1 /*
2     Implementation of backtracking std.regex engine.
3     Contains both compile-time and run-time versions.
4 */
5 module std.regex.internal.backtracking;
6 
7 package(std.regex):
8 
9 import core.stdc.stdlib, std.range.primitives, std.traits, std.typecons;
10 import std.regex.internal.ir;
11 
12 import core.memory : pureMalloc, pureFree;
13 
14 /+
15     BacktrackingMatcher implements backtracking scheme of matching
16     regular expressions.
17 +/
18 @trusted class BacktrackingMatcher(Char, Stream = Input!Char) : Matcher!Char
19 if (is(Char : dchar))
20 {
21     alias DataIndex = Stream.DataIndex;
22     struct State
23     {//top bit in pc is set if saved along with matches
24         DataIndex index;
25         uint pc, counter, infiniteNesting;
26     }
27     static assert(State.sizeof % size_t.sizeof == 0);
28     enum stateSize = State.sizeof / size_t.sizeof;
29     enum initialStack = 1 << 11; // items in a block of segmented stack
30     alias String = const(Char)[];
31     alias RegEx = Regex!Char;
32     alias MatchFn = bool function(BacktrackingMatcher) pure;
33     const RegEx re;         // regex program
34     MatchFn nativeFn; // native code for that program
35     // Stream state
36     Stream s;
37     DataIndex index;
38     dchar front;
39     bool exhausted;
40     // Backtracking machine state
41     uint pc, counter;
42     DataIndex lastState = 0;    // Top of state stack
43     uint infiniteNesting;
44     size_t[] memory;
45     Trace[]  merge;
46     static struct Trace
47     {
48         ulong mask;
49         size_t offset;
50 
markTrace51         bool mark(size_t idx)
52         {
53             immutable d = idx - offset;
54             if (d < 64) // including overflow
55             {
56                 immutable p = mask & (1UL << d);
57                 mask |= 1UL << d;
58                 return p != 0;
59             }
60             else
61             {
62                 offset = idx;
63                 mask = 1;
64                 return false;
65             }
66         }
67     }
68     //local slice of matches, global for backref
69     Group!DataIndex[] matches, backrefed;
70     size_t _refCount;
71 final:
72 
refCount()73     override @property ref size_t refCount() { return _refCount; }
pattern()74     override @property ref const(RegEx) pattern(){ return re; }
75 
76     static if (__traits(hasMember,Stream, "search"))
77     {
78         enum kicked = true;
79     }
80     else
81         enum kicked = false;
82 
initialMemory(const ref RegEx re)83     static size_t initialMemory(const ref RegEx re)
84     {
85         return stackSize(re)*size_t.sizeof + re.hotspotTableSize*Trace.sizeof;
86     }
87 
stackSize(const ref RegEx re)88     static size_t stackSize(const ref RegEx re)
89     {
90         size_t itemSize = stateSize
91             + re.ngroup * (Group!DataIndex).sizeof / size_t.sizeof;
92         return initialStack * itemSize + 2;
93     }
94 
atStart()95     @property bool atStart(){ return index == 0; }
96 
atEnd()97     @property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
98 
next()99     void next()
100     {
101         if (!s.nextChar(front, index))
102             index = s.lastIndex;
103     }
104 
search()105     void search()
106     {
107         static if (kicked)
108         {
109             if (!s.search(re.kickstart, front, index))
110             {
111                 index = s.lastIndex;
112             }
113         }
114         else
115             next();
116     }
117 
118     //
newStack()119     void newStack()
120     {
121         auto chunk = mallocArray!(size_t)(stackSize(re));
122         chunk[0] = cast(size_t)(memory.ptr);
123         chunk[1] = lastState;
124         memory = chunk[2..$];
125         lastState = 0;
126     }
127 
prevStack()128     bool prevStack()
129     {
130         // pointer to previous block
131         size_t* prev = cast(size_t*) memory.ptr[-2];
132         if (!prev)
133         {
134             // The last segment is freed in RegexMatch
135             return false;
136         }
137         else
138         {
139             import core.memory : pureFree;
140             // memory used in previous block
141             size_t size = memory.ptr[-1];
142             pureFree(memory.ptr-2);
143             memory = prev[0 .. size];
144             lastState = size;
145             return true;
146         }
147     }
148 
initExternalMemory(void[]memBlock)149     void initExternalMemory(void[] memBlock)
150     {
151         merge = arrayInChunk!(Trace)(re.hotspotTableSize, memBlock);
152         merge[] = Trace.init;
153         memory = cast(size_t[]) memBlock;
154         memory[0] = 0; // hidden pointer
155         memory[1] = 0; // used size
156         memory = memory[2..$];
157     }
158 
initialize(ref const RegEx program,Stream stream,void[]memBlock)159     void initialize(ref const RegEx program, Stream stream, void[] memBlock)
160     {
161         s = stream;
162         exhausted = false;
163         initExternalMemory(memBlock);
164         backrefed = null;
165     }
166 
167     override void dupTo(Matcher!Char m, void[] memBlock)
168     {
169         auto backtracking = cast(BacktrackingMatcher) m;
170         backtracking.s = s;
171         backtracking.front = front;
172         backtracking.index = index;
173         backtracking.exhausted = exhausted;
174         backtracking.initExternalMemory(memBlock);
175     }
176 
177     override Matcher!Char rearm(in Char[] data)
178     {
179         merge[] = Trace.init;
180         exhausted = false;
181         s = Stream(data);
182         next();
183         return this;
184     }
185 
this(ref const RegEx program,Stream stream,void[]memBlock,dchar ch,DataIndex idx)186     this(ref const RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx)
187     {
188         _refCount = 1;
189         re = program;
190         nativeFn = null;
191         initialize(program, stream, memBlock);
192         front = ch;
193         index = idx;
194     }
195 
this(ref const RegEx program,MatchFn func,Stream stream,void[]memBlock)196     this(ref const RegEx program, MatchFn func, Stream stream, void[] memBlock)
197     {
198         _refCount = 1;
199         re = program;
200         initialize(program, stream, memBlock);
201         nativeFn = func;
202         next();
203     }
204 
this(ref const RegEx program,Stream stream,void[]memBlock)205     this(ref const RegEx program, Stream stream, void[] memBlock)
206     {
207         _refCount = 1;
208         re = program;
209         nativeFn = null;
210         initialize(program, stream, memBlock);
211         next();
212     }
213 
fwdMatcher(ref const RegEx re,void[]memBlock)214     auto fwdMatcher(ref const RegEx re, void[] memBlock)
215     {
216         alias BackMatcher = BacktrackingMatcher!(Char, Stream);
217         auto fwdMatcher = new BackMatcher(re, s, memBlock, front, index);
218         return fwdMatcher;
219     }
220 
bwdMatcher(ref const RegEx re,void[]memBlock)221     auto bwdMatcher(ref const RegEx re, void[] memBlock)
222     {
223         alias BackMatcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
224         auto fwdMatcher =
225             new BackMatcher(re, s.loopBack(index), memBlock);
226         return fwdMatcher;
227     }
228 
229     //
matchFinalize()230     int matchFinalize()
231     {
232         immutable start = index;
233         immutable val = matchImpl();
234         if (val)
235         {//stream is updated here
236             matches[0].begin = start;
237             matches[0].end = index;
238             if (!(re.flags & RegexOption.global) || atEnd)
239                 exhausted = true;
240             if (start == index)//empty match advances input
241                 next();
242             return val;
243         }
244         else
245             return 0;
246     }
247 
248     //lookup next match, fill matches with indices into input
249     override int match(Group!DataIndex[] matches)
250     {
debug(std_regex_matcher)251         debug(std_regex_matcher)
252         {
253             writeln("------------------------------------------");
254         }
255         if (exhausted) //all matches collected
256             return false;
257         this.matches = matches;
258         if (re.flags & RegexInfo.oneShot)
259         {
260             exhausted = true;
261             const DataIndex start = index;
262             immutable m = matchImpl();
263             if (m)
264             {
265                 matches[0].begin = start;
266                 matches[0].end = index;
267             }
268             return m;
269         }
270         static if (kicked)
271         {
272             if (!re.kickstart.empty)
273             {
274                 for (;;)
275                 {
276                     immutable val = matchFinalize();
277                     if (val)
278                         return val;
279                     else
280                     {
281                         if (atEnd)
282                             break;
283                         search();
284                         if (atEnd)
285                         {
286                             exhausted = true;
287                             return matchFinalize();
288                         }
289                     }
290                 }
291                 exhausted = true;
292                 return 0; //early return
293             }
294         }
295         //no search available - skip a char at a time
296         for (;;)
297         {
298             immutable val = matchFinalize();
299             if (val)
300                 return val;
301             else
302             {
303                 if (atEnd)
304                     break;
305                 next();
306                 if (atEnd)
307                 {
308                     exhausted = true;
309                     return matchFinalize();
310                 }
311             }
312         }
313         exhausted = true;
314         return 0;
315     }
316 
317     /+
318         match subexpression against input,
319         results are stored in matches
320     +/
matchImpl()321     int matchImpl() pure
322     {
323         if (nativeFn)
324         {
325             debug(std_regex_ctr) writeln("using C-T matcher");
326             return nativeFn(this);
327         }
328         else
329         {
330             pc = 0;
331             counter = 0;
332             lastState = 0;
333             infiniteNesting = 0;
334             matches[] = Group!DataIndex.init;
335             auto start = s._index;
336             debug(std_regex_matcher)
337                 writeln("Try match starting at ", s[index .. s.lastIndex]);
338             for (;;)
339             {
340                 debug(std_regex_matcher)
341                     writefln("PC: %s\tCNT: %s\t%s \tfront: %s src: %s",
342                         pc, counter, disassemble(re.ir, pc, re.dict),
343                         front, s._index);
344                 switch (re.ir[pc].code)
345                 {
346                 case IR.OrChar://assumes IRL!(OrChar) == 1
347                     if (atEnd)
348                         goto L_backtrack;
349                     uint len = re.ir[pc].sequence;
350                     uint end = pc + len;
351                     if (re.ir[pc].data != front && re.ir[pc+1].data != front)
352                     {
353                         for (pc = pc+2; pc < end; pc++)
354                             if (re.ir[pc].data == front)
355                                 break;
356                         if (pc == end)
357                             goto L_backtrack;
358                     }
359                     pc = end;
360                     next();
361                     break;
362                 case IR.Char:
363                     if (atEnd || front != re.ir[pc].data)
364                         goto L_backtrack;
365                     pc += IRL!(IR.Char);
366                     next();
367                 break;
368                 case IR.Any:
369                     if (atEnd)
370                         goto L_backtrack;
371                     pc += IRL!(IR.Any);
372                     next();
373                     break;
374                 case IR.CodepointSet:
375                     if (atEnd || !re.charsets[re.ir[pc].data].scanFor(front))
376                         goto L_backtrack;
377                     next();
378                     pc += IRL!(IR.CodepointSet);
379                     break;
380                 case IR.Trie:
381                     if (atEnd || !re.matchers[re.ir[pc].data][front])
382                         goto L_backtrack;
383                     next();
384                     pc += IRL!(IR.Trie);
385                     break;
386                 case IR.Wordboundary:
387                     dchar back;
388                     DataIndex bi;
389                     //at start & end of input
390                     if (atStart && wordMatcher[front])
391                     {
392                         pc += IRL!(IR.Wordboundary);
393                         break;
394                     }
395                     else if (atEnd && s.loopBack(index).nextChar(back, bi)
396                             && wordMatcher[back])
397                     {
398                         pc += IRL!(IR.Wordboundary);
399                         break;
400                     }
401                     else if (s.loopBack(index).nextChar(back, bi))
402                     {
403                         immutable af = wordMatcher[front];
404                         immutable ab = wordMatcher[back];
405                         if (af ^ ab)
406                         {
407                             pc += IRL!(IR.Wordboundary);
408                             break;
409                         }
410                     }
411                     goto L_backtrack;
412                 case IR.Notwordboundary:
413                     dchar back;
414                     DataIndex bi;
415                     //at start & end of input
416                     if (atStart && wordMatcher[front])
417                         goto L_backtrack;
418                     else if (atEnd && s.loopBack(index).nextChar(back, bi)
419                             && wordMatcher[back])
420                         goto L_backtrack;
421                     else if (s.loopBack(index).nextChar(back, bi))
422                     {
423                         immutable af = wordMatcher[front];
424                         immutable ab = wordMatcher[back];
425                         if (af ^ ab)
426                             goto L_backtrack;
427                     }
428                     pc += IRL!(IR.Wordboundary);
429                     break;
430                 case IR.Bof:
431                     if (atStart)
432                         pc += IRL!(IR.Bol);
433                     else
434                         goto L_backtrack;
435                     break;
436                 case IR.Bol:
437                     dchar back;
438                     DataIndex bi;
439                     if (atStart)
440                         pc += IRL!(IR.Bol);
441                     else if (s.loopBack(index).nextChar(back,bi)
442                         && endOfLine(back, front == '\n'))
443                     {
444                         pc += IRL!(IR.Bol);
445                     }
446                     else
447                         goto L_backtrack;
448                     break;
449                 case IR.Eof:
450                     if (atEnd)
451                         pc += IRL!(IR.Eol);
452                     else
453                         goto L_backtrack;
454                     break;
455                 case IR.Eol:
456                     dchar back;
457                     DataIndex bi;
458                     debug(std_regex_matcher) writefln("EOL (front 0x%x) %s", front, s[index .. s.lastIndex]);
459                     //no matching inside \r\n
460                     if (atEnd || (endOfLine(front, s.loopBack(index).nextChar(back,bi)
461                             && back == '\r')))
462                     {
463                         pc += IRL!(IR.Eol);
464                     }
465                     else
466                         goto L_backtrack;
467                     break;
468                 case IR.InfiniteStart, IR.InfiniteQStart:
469                     pc += re.ir[pc].data + IRL!(IR.InfiniteStart);
470                     //now pc is at end IR.Infinite(Q)End
471                     uint len = re.ir[pc].data;
472                     if (re.ir[pc].code == IR.InfiniteEnd)
473                     {
474                         pushState(pc+IRL!(IR.InfiniteEnd), counter);
475                         pc -= len;
476                     }
477                     else
478                     {
479                         pushState(pc - len, counter);
480                         pc += IRL!(IR.InfiniteEnd);
481                     }
482                     break;
483                 case IR.InfiniteBloomStart:
484                     pc += re.ir[pc].data + IRL!(IR.InfiniteBloomStart);
485                     //now pc is at end IR.InfiniteBloomEnd
486                     immutable len = re.ir[pc].data;
487                     immutable filterIdx = re.ir[pc+2].raw;
488                     if (re.filters[filterIdx][front])
489                         pushState(pc+IRL!(IR.InfiniteBloomEnd), counter);
490                     pc -= len;
491                     break;
492                 case IR.RepeatStart, IR.RepeatQStart:
493                     pc += re.ir[pc].data + IRL!(IR.RepeatStart);
494                     break;
495                 case IR.RepeatEnd:
496                 case IR.RepeatQEnd:
497                     if (merge[re.ir[pc + 1].raw+counter].mark(index))
498                     {
499                         // merged!
500                         goto L_backtrack;
501                     }
502                     //len, step, min, max
503                     immutable len = re.ir[pc].data;
504                     immutable step =  re.ir[pc+2].raw;
505                     immutable min = re.ir[pc+3].raw;
506                     immutable max = re.ir[pc+4].raw;
507                     if (counter < min)
508                     {
509                         counter += step;
510                         pc -= len;
511                     }
512                     else if (counter < max)
513                     {
514                         if (re.ir[pc].code == IR.RepeatEnd)
515                         {
516                             pushState(pc + IRL!(IR.RepeatEnd), counter%step);
517                             counter += step;
518                             pc -= len;
519                         }
520                         else
521                         {
522                             pushState(pc - len, counter + step);
523                             counter = counter%step;
524                             pc += IRL!(IR.RepeatEnd);
525                         }
526                     }
527                     else
528                     {
529                         counter = counter%step;
530                         pc += IRL!(IR.RepeatEnd);
531                     }
532                     break;
533                 case IR.InfiniteEnd:
534                 case IR.InfiniteQEnd:
535                     debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
536                     if (merge[re.ir[pc + 1].raw+counter].mark(index))
537                     {
538                         // merged!
539                         goto L_backtrack;
540                     }
541                     immutable len = re.ir[pc].data;
542                     if (re.ir[pc].code == IR.InfiniteEnd)
543                     {
544                         pushState(pc + IRL!(IR.InfiniteEnd), counter);
545                         pc -= len;
546                     }
547                     else
548                     {
549                         pushState(pc-len, counter);
550                         pc += IRL!(IR.InfiniteEnd);
551                     }
552                     break;
553                 case IR.InfiniteBloomEnd:
554                     debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
555                     if (merge[re.ir[pc + 1].raw+counter].mark(index))
556                     {
557                         // merged!
558                         goto L_backtrack;
559                     }
560                     immutable len = re.ir[pc].data;
561                     immutable filterIdx = re.ir[pc+2].raw;
562                     if (re.filters[filterIdx][front])
563                     {
564                         infiniteNesting--;
565                         pushState(pc + IRL!(IR.InfiniteBloomEnd), counter);
566                         infiniteNesting++;
567                     }
568                     pc -= len;
569                     break;
570                 case IR.OrEnd:
571                     if (merge[re.ir[pc + 1].raw+counter].mark(index))
572                     {
573                         // merged!
574                         goto L_backtrack;
575                     }
576                     pc += IRL!(IR.OrEnd);
577                     break;
578                 case IR.OrStart:
579                     pc += IRL!(IR.OrStart);
580                     goto case;
581                 case IR.Option:
582                     immutable len = re.ir[pc].data;
583                     if (re.ir[pc+len].code == IR.GotoEndOr)//not a last one
584                     {
585                         pushState(pc + len + IRL!(IR.Option), counter); //remember 2nd branch
586                     }
587                     pc += IRL!(IR.Option);
588                     break;
589                 case IR.GotoEndOr:
590                     pc = pc + re.ir[pc].data + IRL!(IR.GotoEndOr);
591                     break;
592                 case IR.GroupStart:
593                     immutable n = re.ir[pc].data;
594                     matches[n].begin = index;
595                     debug(std_regex_matcher)  writefln("IR group #%u starts at %u", n, index);
596                     pc += IRL!(IR.GroupStart);
597                     break;
598                 case IR.GroupEnd:
599                     immutable n = re.ir[pc].data;
600                     matches[n].end = index;
601                     debug(std_regex_matcher) writefln("IR group #%u ends at %u", n, index);
602                     pc += IRL!(IR.GroupEnd);
603                     break;
604                 case IR.LookaheadStart:
605                 case IR.NeglookaheadStart:
606                     immutable len = re.ir[pc].data;
607                     auto save = index;
608                     immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
609                     auto mem = pureMalloc(initialMemory(re))[0 .. initialMemory(re)];
610                     scope(exit) pureFree(mem.ptr);
611                     auto slicedRe = re.withCode(re.ir[
612                         pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd)
613                     ]);
614                     static if (Stream.isLoopback)
615                     {
616                         auto matcher = bwdMatcher(slicedRe, mem);
617                     }
618                     else
619                     {
620                         auto matcher = fwdMatcher(slicedRe, mem);
621                     }
622                     matcher.matches = matches[ms .. me];
623                     matcher.backrefed = backrefed.empty ? matches : backrefed;
624                     immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart);
625                     s.reset(save);
626                     next();
627                     if (!match)
628                         goto L_backtrack;
629                     else
630                     {
631                         pc += IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd);
632                     }
633                     break;
634                 case IR.LookbehindStart:
635                 case IR.NeglookbehindStart:
636                     immutable len = re.ir[pc].data;
637                     immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
638                     auto mem = pureMalloc(initialMemory(re))[0 .. initialMemory(re)];
639                     scope(exit) pureFree(mem.ptr);
640                     auto slicedRe = re.withCode(re.ir[
641                         pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd)
642                     ]);
643                     static if (Stream.isLoopback)
644                     {
645                         alias Matcher = BacktrackingMatcher!(Char, Stream);
646                         auto matcher = new Matcher(slicedRe, s, mem, front, index);
647                     }
648                     else
649                     {
650                         alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
651                         auto matcher = new Matcher(slicedRe, s.loopBack(index), mem);
652                     }
653                     matcher.matches = matches[ms .. me];
654                     matcher.backrefed  = backrefed.empty ? matches : backrefed;
655                     immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart);
656                     if (!match)
657                         goto L_backtrack;
658                     else
659                     {
660                         pc += IRL!(IR.LookbehindStart)+len+IRL!(IR.LookbehindEnd);
661                     }
662                     break;
663                 case IR.Backref:
664                     immutable n = re.ir[pc].data;
665                     auto referenced = re.ir[pc].localRef
666                             ? s[matches[n].begin .. matches[n].end]
667                             : s[backrefed[n].begin .. backrefed[n].end];
668                     while (!atEnd && !referenced.empty && front == referenced.front)
669                     {
670                         next();
671                         referenced.popFront();
672                     }
673                     if (referenced.empty)
674                         pc++;
675                     else
676                         goto L_backtrack;
677                     break;
678                     case IR.Nop:
679                     pc += IRL!(IR.Nop);
680                     break;
681                 case IR.LookaheadEnd:
682                 case IR.NeglookaheadEnd:
683                 case IR.LookbehindEnd:
684                 case IR.NeglookbehindEnd:
685                 case IR.End:
686                     // cleanup stale stack blocks if any
687                     while (prevStack()) {}
688                     return re.ir[pc].data;
689                 default:
690                     debug printBytecode(re.ir[0..$]);
691                     assert(0);
692                 L_backtrack:
693                     if (!popState())
694                     {
695                         s.reset(start);
696                         return 0;
697                     }
698                 }
699             }
700         }
701         assert(0);
702     }
703 
stackAvail()704     @property size_t stackAvail()
705     {
706         return memory.length - lastState;
707     }
708 
709     void stackPush(T)(T val)
710         if (!isDynamicArray!T)
711     {
712         *cast(T*)&memory[lastState] = val;
713         enum delta = (T.sizeof+size_t.sizeof/2)/size_t.sizeof;
714         lastState += delta;
715         debug(std_regex_matcher) writeln("push element SP= ", lastState);
716     }
717 
stackPush(T)718     void stackPush(T)(T[] val)
719     {
720         static assert(T.sizeof % size_t.sizeof == 0);
721         (cast(T*)&memory[lastState])[0 .. val.length]
722             = val[0..$];
723         lastState += val.length*(T.sizeof/size_t.sizeof);
724         debug(std_regex_matcher) writeln("push array SP= ", lastState);
725     }
726 
727     void stackPop(T)(ref T val)
728         if (!isDynamicArray!T)
729     {
730         enum delta = (T.sizeof+size_t.sizeof/2)/size_t.sizeof;
731         lastState -= delta;
732         val = *cast(T*)&memory[lastState];
733         debug(std_regex_matcher) writeln("pop element SP= ", lastState);
734     }
735 
stackPop(T)736     void stackPop(T)(T[] val)
737     {
738         stackPop(val);  // call ref version
739     }
stackPop(T)740     void stackPop(T)(ref T[] val)
741     {
742         lastState -= val.length*(T.sizeof/size_t.sizeof);
743         val[0..$] = (cast(T*)&memory[lastState])[0 .. val.length];
744         debug(std_regex_matcher) writeln("pop array SP= ", lastState);
745     }
746     //helper function, saves engine state
pushState(uint pc,uint counter)747     void pushState(uint pc, uint counter)
748     {
749         if (stateSize + 2 * matches.length > stackAvail)
750         {
751             newStack();
752         }
753         *cast(State*)&memory[lastState] =
754             State(index, pc, counter, infiniteNesting);
755         lastState += stateSize;
756         memory[lastState .. lastState + 2 * matches.length] = (cast(size_t[]) matches)[];
757         lastState += 2*matches.length;
758         debug(std_regex_matcher)
759             writefln("Saved(pc=%s) front: %s src: %s",
760                 pc, front, s[index .. s.lastIndex]);
761     }
762 
763     //helper function, restores engine state
popState()764     bool popState()
765     {
766         if (!lastState && !prevStack())
767             return false;
768         lastState -= 2*matches.length;
769         auto pm = cast(size_t[]) matches;
770         pm[] = memory[lastState .. lastState + 2 * matches.length];
771         lastState -= stateSize;
772         State* state = cast(State*)&memory[lastState];
773         index = state.index;
774         pc = state.pc;
775         counter = state.counter;
776         infiniteNesting = state.infiniteNesting;
777         debug(std_regex_matcher)
778         {
779             writefln("Restored matches", front, s[index .. s.lastIndex]);
780             foreach (i, m; matches)
781                 writefln("Sub(%d) : %s..%s", i, m.begin, m.end);
782         }
783         s.reset(index);
784         next();
785         debug(std_regex_matcher)
786             writefln("Backtracked (pc=%s) front: %s src: %s",
787                 pc, front, s[index .. s.lastIndex]);
788         return true;
789     }
790 }
791 
792 //very shitty string formatter, $$ replaced with next argument converted to string
ctSub(U...)793 @trusted string ctSub( U...)(string format, U args)
794 {
795     import std.conv : to;
796     bool seenDollar;
797     foreach (i, ch; format)
798     {
799         if (ch == '$')
800         {
801             if (seenDollar)
802             {
803                 static if (args.length > 0)
804                 {
805                     return  format[0 .. i - 1] ~ to!string(args[0])
806                         ~ ctSub(format[i + 1 .. $], args[1 .. $]);
807                 }
808                 else
809                     assert(0);
810             }
811             else
812                 seenDollar = true;
813         }
814         else
815             seenDollar = false;
816 
817     }
818     return format;
819 }
820 
821 struct CtContext
822 {
823     import std.conv : to, text;
824     //dirty flags
825     bool counter;
826     //to mark the portion of matches to save
827     int match, total_matches;
828     int reserved;
829     const(CodepointInterval)[][] charsets;
830 
831 
832     //state of codegenerator
833     static struct CtState
834     {
835         string code;
836         int addr;
837     }
838 
thisCtContext839     this(Char)(ref const Regex!Char re)
840     {
841         match = 1;
842         reserved = 1; //first match is skipped
843         total_matches = re.ngroup;
844         foreach (ref set; re.charsets)
845         {
846             charsets ~= set.intervals;
847         }
848     }
849 
lookaroundCtContext850     CtContext lookaround(uint s, uint e)
851     {
852         CtContext ct;
853         ct.total_matches = e - s;
854         ct.match = 1;
855         return ct;
856     }
857 
858     //restore state having current context
restoreCodeCtContext859     string restoreCode()
860     {
861         string text;
862         //stack is checked in L_backtrack
863         text ~= counter
864             ? "
865                     stackPop(counter);"
866             : "
867                     counter = 0;";
868         if (match < total_matches)
869         {
870             text ~= ctSub("
871                     stackPop(matches[$$..$$]);", reserved, match);
872             text ~= ctSub("
873                     matches[$$..$] = typeof(matches[0]).init;", match);
874         }
875         else
876             text ~= ctSub("
877                     stackPop(matches[$$..$]);", reserved);
878         return text;
879     }
880 
881     //save state having current context
882     string saveCode(uint pc, string count_expr="counter")
883     {
884         string text = ctSub("
885                     if (stackAvail < $$*(Group!(DataIndex)).sizeof/size_t.sizeof + $$)
886                     {
887                         newStack();
888                     }", match - reserved, cast(int) counter + 2);
889         if (match < total_matches)
890             text ~= ctSub("
891                     stackPush(matches[$$..$$]);", reserved, match);
892         else
893             text ~= ctSub("
894                     stackPush(matches[$$..$]);", reserved);
895         text ~= counter ? ctSub("
896                     stackPush($$);", count_expr) : "";
897         text ~= ctSub("
898                     stackPush(index); stackPush($$); \n", pc);
899         return text;
900     }
901 
902     //
ctGenBlockCtContext903     CtState ctGenBlock(const(Bytecode)[] ir, int addr)
904     {
905         CtState result;
906         result.addr = addr;
907         while (!ir.empty)
908         {
909             auto n = ctGenGroup(ir, result.addr);
910             result.code ~= n.code;
911             result.addr = n.addr;
912         }
913         return result;
914     }
915 
916     //
ctGenGroupCtContext917     CtState ctGenGroup(ref const(Bytecode)[] ir, int addr)
918     {
919         import std.algorithm.comparison : max;
920         auto bailOut = "goto L_backtrack;";
921         auto nextInstr = ctSub("goto case $$;", addr+1);
922         CtState r;
923         assert(!ir.empty);
924         switch (ir[0].code)
925         {
926         case IR.InfiniteStart,  IR.InfiniteBloomStart,IR.InfiniteQStart, IR.RepeatStart, IR.RepeatQStart:
927             immutable infLoop =
928                 ir[0].code == IR.InfiniteStart || ir[0].code == IR.InfiniteQStart ||
929                 ir[0].code == IR.InfiniteBloomStart;
930 
931             counter = counter ||
932                 ir[0].code == IR.RepeatStart || ir[0].code == IR.RepeatQStart;
933             immutable len = ir[0].data;
934             auto nir = ir[ir[0].length .. ir[0].length+len];
935             r = ctGenBlock(nir, addr+1);
936             //start/end codegen
937             //r.addr is at last test+ jump of loop, addr+1 is body of loop
938             nir = ir[ir[0].length + len .. $];
939             r.code = ctGenFixupCode(ir[0 .. ir[0].length], addr, r.addr) ~ r.code;
940             r.code ~= ctGenFixupCode(nir, r.addr, addr+1);
941             r.addr += 2;   //account end instruction + restore state
942             ir = nir;
943             break;
944         case IR.OrStart:
945             immutable len = ir[0].data;
946             auto nir = ir[ir[0].length .. ir[0].length+len];
947             r = ctGenAlternation(nir, addr);
948             ir = ir[ir[0].length + len .. $];
949             assert(ir[0].code == IR.OrEnd);
950             ir = ir[ir[0].length..$];
951             break;
952         case IR.LookaheadStart:
953         case IR.NeglookaheadStart:
954         case IR.LookbehindStart:
955         case IR.NeglookbehindStart:
956             immutable len = ir[0].data;
957             immutable behind = ir[0].code == IR.LookbehindStart || ir[0].code == IR.NeglookbehindStart;
958             immutable negative = ir[0].code == IR.NeglookaheadStart || ir[0].code == IR.NeglookbehindStart;
959             string fwdType = "typeof(fwdMatcher(re, []))";
960             string bwdType = "typeof(bwdMatcher(re, []))";
961             string fwdCreate = "fwdMatcher(re, mem)";
962             string bwdCreate = "bwdMatcher(re, mem)";
963             immutable start = IRL!(IR.LookbehindStart);
964             immutable end = IRL!(IR.LookbehindStart)+len+IRL!(IR.LookaheadEnd);
965             CtContext context = lookaround(ir[1].raw, ir[2].raw); //split off new context
966             auto slice = ir[start .. end];
967             r.code ~= ctSub(`
968             case $$: //fake lookaround "atom"
969                     static if (typeof(matcher.s).isLoopback)
970                         alias Lookaround = $$;
971                     else
972                         alias Lookaround = $$;
973                     static bool matcher_$$(Lookaround matcher) @trusted
974                     {
975                         //(neg)lookaround piece start
976                         $$
977                         //(neg)lookaround piece ends
978                     }
979                     auto save = index;
980                     auto mem = pureMalloc(initialMemory(re))[0 .. initialMemory(re)];
981                     scope(exit) pureFree(mem.ptr);
982                     static if (typeof(matcher.s).isLoopback)
983                         auto lookaround = $$;
984                     else
985                         auto lookaround = $$;
986                     lookaround.matches = matches[$$..$$];
987                     lookaround.backrefed = backrefed.empty ? matches : backrefed;
988                     lookaround.nativeFn = &matcher_$$; //hookup closure's binary code
989                     int match = $$;
990                     s.reset(save);
991                     next();
992                     if (match)
993                         $$
994                     else
995                         $$`, addr,
996                         behind ? fwdType : bwdType, behind ? bwdType : fwdType,
997                         addr, context.ctGenRegEx(slice),
998                         behind ? fwdCreate : bwdCreate, behind ? bwdCreate : fwdCreate,
999                         ir[1].raw, ir[2].raw, //start - end of matches slice
1000                         addr,
1001                         negative ? "!lookaround.matchImpl()" : "lookaround.matchImpl()",
1002                         nextInstr, bailOut);
1003             ir = ir[end .. $];
1004             r.addr = addr + 1;
1005             break;
1006         case IR.LookaheadEnd: case IR.NeglookaheadEnd:
1007         case IR.LookbehindEnd: case IR.NeglookbehindEnd:
1008             ir = ir[IRL!(IR.LookaheadEnd) .. $];
1009             r.addr = addr;
1010             break;
1011         default:
1012             assert(ir[0].isAtom,  text(ir[0].mnemonic));
1013             r = ctGenAtom(ir, addr);
1014         }
1015         return r;
1016     }
1017 
1018     //generate source for bytecode contained  in OrStart ... OrEnd
ctGenAlternationCtContext1019     CtState ctGenAlternation(const(Bytecode)[] ir, int addr)
1020     {
1021         CtState[] pieces;
1022         CtState r;
1023         enum optL = IRL!(IR.Option);
1024         for (;;)
1025         {
1026             assert(ir[0].code == IR.Option);
1027             auto len = ir[0].data;
1028             if (optL+len < ir.length  && ir[optL+len].code == IR.Option)//not a last option
1029             {
1030                 auto nir = ir[optL .. optL+len-IRL!(IR.GotoEndOr)];
1031                 r = ctGenBlock(nir, addr+2);//space for Option + restore state
1032                 //r.addr+1 to account GotoEndOr  at end of branch
1033                 r.code = ctGenFixupCode(ir[0 .. ir[0].length], addr, r.addr+1) ~ r.code;
1034                 addr = r.addr+1;//leave space for GotoEndOr
1035                 pieces ~= r;
1036                 ir = ir[optL + len .. $];
1037             }
1038             else
1039             {
1040                 pieces ~= ctGenBlock(ir[optL..$], addr);
1041                 addr = pieces[$-1].addr;
1042                 break;
1043             }
1044         }
1045         r = pieces[0];
1046         for (uint i = 1; i < pieces.length; i++)
1047         {
1048             r.code ~= ctSub(`
1049                 case $$:
1050                     goto case $$; `, pieces[i-1].addr, addr);
1051             r.code ~= pieces[i].code;
1052         }
1053         r.addr = addr;
1054         return r;
1055     }
1056 
1057     // generate fixup code for instruction in ir,
1058     // fixup means it has an alternative way for control flow
ctGenFixupCodeCtContext1059     string ctGenFixupCode(const(Bytecode)[] ir, int addr, int fixup)
1060     {
1061         return ctGenFixupCode(ir, addr, fixup); // call ref Bytecode[] version
1062     }
ctGenFixupCodeCtContext1063     string ctGenFixupCode(ref const(Bytecode)[] ir, int addr, int fixup)
1064     {
1065         string r;
1066         string testCode;
1067         r = ctSub(`
1068                 case $$: debug(std_regex_matcher) writeln("#$$");`,
1069                     addr, addr);
1070         switch (ir[0].code)
1071         {
1072         case IR.InfiniteStart, IR.InfiniteQStart, IR.InfiniteBloomStart:
1073             r ~= ctSub( `
1074                     goto case $$;`, fixup);
1075             ir = ir[ir[0].length..$];
1076             break;
1077         case IR.InfiniteEnd:
1078             testCode = ctQuickTest(ir[IRL!(IR.InfiniteEnd) .. $],addr + 1);
1079             r ~= ctSub( `
1080                     if (merge[$$+counter].mark(index))
1081                     {
1082                         // merged!
1083                         goto L_backtrack;
1084                     }
1085 
1086                     $$
1087                     {
1088                         $$
1089                     }
1090                     goto case $$;
1091                 case $$: //restore state and go out of loop
1092                     $$
1093                     goto case;`, ir[1].raw, testCode, saveCode(addr+1), fixup,
1094                     addr+1, restoreCode());
1095             ir = ir[ir[0].length..$];
1096             break;
1097         case IR.InfiniteBloomEnd:
1098             //TODO: check bloom filter and skip on failure
1099             testCode = ctQuickTest(ir[IRL!(IR.InfiniteBloomEnd) .. $],addr + 1);
1100             r ~= ctSub( `
1101                     if (merge[$$+counter].mark(index))
1102                     {
1103                         // merged!
1104                         goto L_backtrack;
1105                     }
1106 
1107                     $$
1108                     {
1109                         $$
1110                     }
1111                     goto case $$;
1112                 case $$: //restore state and go out of loop
1113                     $$
1114                     goto case;`, ir[1].raw, testCode, saveCode(addr+1), fixup,
1115                     addr+1, restoreCode());
1116             ir = ir[ir[0].length..$];
1117             break;
1118         case IR.InfiniteQEnd:
1119             testCode = ctQuickTest(ir[IRL!(IR.InfiniteEnd) .. $],addr + 1);
1120             auto altCode = testCode.length ? ctSub("else goto case $$;", fixup) : "";
1121             r ~= ctSub( `
1122                     if (merge[$$+counter].mark(index))
1123                     {
1124                         // merged!
1125                         goto L_backtrack;
1126                     }
1127 
1128                     $$
1129                     {
1130                         $$
1131                         goto case $$;
1132                     }
1133                     $$
1134                 case $$://restore state and go inside loop
1135                     $$
1136                     goto case $$;`, ir[1].raw,
1137                     testCode, saveCode(addr+1), addr+2, altCode,
1138                     addr+1, restoreCode(), fixup);
1139             ir = ir[ir[0].length..$];
1140             break;
1141         case IR.RepeatStart, IR.RepeatQStart:
1142             r ~= ctSub( `
1143                     goto case $$;`, fixup);
1144             ir = ir[ir[0].length..$];
1145             break;
1146          case IR.RepeatEnd, IR.RepeatQEnd:
1147             //len, step, min, max
1148             immutable len = ir[0].data;
1149             immutable step = ir[2].raw;
1150             immutable min = ir[3].raw;
1151             immutable max = ir[4].raw;
1152             r ~= ctSub(`
1153                     if (merge[$$+counter].mark(index))
1154                     {
1155                         // merged!
1156                         goto L_backtrack;
1157                     }
1158                     if (counter < $$)
1159                     {
1160                         debug(std_regex_matcher) writeln("RepeatEnd min case pc=", $$);
1161                         counter += $$;
1162                         goto case $$;
1163                     }`,  ir[1].raw, min, addr, step, fixup);
1164             if (ir[0].code == IR.RepeatEnd)
1165             {
1166                 string counter_expr = ctSub("counter % $$", step);
1167                 r ~= ctSub(`
1168                     else if (counter < $$)
1169                     {
1170                             $$
1171                             counter += $$;
1172                             goto case $$;
1173                     }`, max, saveCode(addr+1, counter_expr), step, fixup);
1174             }
1175             else
1176             {
1177                 string counter_expr = ctSub("counter % $$", step);
1178                 r ~= ctSub(`
1179                     else if (counter < $$)
1180                     {
1181                         $$
1182                         counter = counter % $$;
1183                         goto case $$;
1184                     }`, max, saveCode(addr+1,counter_expr), step, addr+2);
1185             }
1186             r ~= ctSub(`
1187                     else
1188                     {
1189                         counter = counter % $$;
1190                         goto case $$;
1191                     }
1192                 case $$: //restore state
1193                     $$
1194                     goto case $$;`, step, addr+2, addr+1, restoreCode(),
1195                     ir[0].code == IR.RepeatEnd ? addr+2 : fixup );
1196             ir = ir[ir[0].length..$];
1197             break;
1198         case IR.Option:
1199             r ~= ctSub( `
1200                 {
1201                     $$
1202                 }
1203                 goto case $$;
1204             case $$://restore thunk to go to the next group
1205                 $$
1206                 goto case $$;`, saveCode(addr+1), addr+2,
1207                     addr+1, restoreCode(), fixup);
1208                 ir = ir[ir[0].length..$];
1209             break;
1210         default:
1211             assert(0, text(ir[0].mnemonic));
1212         }
1213         return r;
1214     }
1215 
1216 
ctQuickTestCtContext1217     string ctQuickTest(const(Bytecode)[] ir, int id)
1218     {
1219         uint pc = 0;
1220         while (pc < ir.length && ir[pc].isAtom)
1221         {
1222             if (ir[pc].code == IR.GroupStart || ir[pc].code == IR.GroupEnd)
1223             {
1224                 pc++;
1225             }
1226             else if (ir[pc].code == IR.Backref)
1227                 break;
1228             else
1229             {
1230                 auto code = ctAtomCode(ir[pc..$], -1);
1231                 return ctSub(`
1232                     int test_$$()
1233                     {
1234                         $$ //$$
1235                     }
1236                     if (test_$$() >= 0)`, id, code.ptr ? code : "return 0;",
1237                         ir[pc].mnemonic, id);
1238             }
1239         }
1240         return "";
1241     }
1242 
1243     //process & generate source for simple bytecodes at front of ir using address addr
ctGenAtomCtContext1244     CtState ctGenAtom(ref const(Bytecode)[] ir, int addr)
1245     {
1246         CtState result;
1247         result.code = ctAtomCode(ir, addr);
1248         ir.popFrontN(ir[0].code == IR.OrChar ? ir[0].sequence : ir[0].length);
1249         result.addr = addr + 1;
1250         return result;
1251     }
1252 
1253     //D code for atom at ir using address addr, addr < 0 means quickTest
ctAtomCodeCtContext1254     string ctAtomCode(const(Bytecode)[] ir, int addr)
1255     {
1256         string code;
1257         string bailOut, nextInstr;
1258         if (addr < 0)
1259         {
1260             bailOut = "return -1;";
1261             nextInstr = "return 0;";
1262         }
1263         else
1264         {
1265             bailOut = "goto L_backtrack;";
1266             nextInstr = ctSub("goto case $$;", addr+1);
1267             code ~=  ctSub( `
1268                  case $$: debug(std_regex_matcher) writeln("#$$");
1269                     `, addr, addr);
1270         }
1271         switch (ir[0].code)
1272         {
1273         case IR.OrChar://assumes IRL!(OrChar) == 1
1274             code ~=  ctSub(`
1275                     if (atEnd)
1276                         $$`, bailOut);
1277             immutable len = ir[0].sequence;
1278             for (uint i = 0; i < len; i++)
1279             {
1280                 code ~= ctSub( `
1281                     if (front == $$)
1282                     {
1283                         $$
1284                         $$
1285                     }`,   ir[i].data, addr >= 0 ? "next();" :"", nextInstr);
1286             }
1287             code ~= ctSub( `
1288                 $$`, bailOut);
1289             break;
1290         case IR.Char:
1291             code ~= ctSub( `
1292                     if (atEnd || front != $$)
1293                         $$
1294                     $$
1295                     $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
1296             break;
1297         case IR.Any:
1298             code ~= ctSub( `
1299                     if (atEnd || (!(re.flags & RegexOption.singleline)
1300                                 && (front == '\r' || front == '\n')))
1301                         $$
1302                     $$
1303                     $$`, bailOut, addr >= 0 ? "next();" :"",nextInstr);
1304             break;
1305         case IR.CodepointSet:
1306             if (charsets.length)
1307             {
1308                 string name = `func_`~to!string(addr+1);
1309                 string funcCode = CodepointSet.toSourceCode(charsets[ir[0].data], name);
1310                 code ~= ctSub( `
1311                     static $$
1312                     if (atEnd || !$$(front))
1313                         $$
1314                     $$
1315                 $$`, funcCode, name, bailOut, addr >= 0 ? "next();" :"", nextInstr);
1316             }
1317             else
1318                 code ~= ctSub( `
1319                     if (atEnd || !re.charsets[$$].scanFor(front))
1320                         $$
1321                     $$
1322                 $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
1323             break;
1324         case IR.Trie:
1325             if (charsets.length && charsets[ir[0].data].length  <= 8)
1326                 goto case IR.CodepointSet;
1327             code ~= ctSub( `
1328                     if (atEnd || !re.matchers[$$][front])
1329                         $$
1330                     $$
1331                 $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
1332             break;
1333         case IR.Wordboundary:
1334             code ~= ctSub( `
1335                     dchar back;
1336                     DataIndex bi;
1337                     if (atStart && wordMatcher[front])
1338                     {
1339                         $$
1340                     }
1341                     else if (atEnd && s.loopBack(index).nextChar(back, bi)
1342                             && wordMatcher[back])
1343                     {
1344                         $$
1345                     }
1346                     else if (s.loopBack(index).nextChar(back, bi))
1347                     {
1348                         bool af = wordMatcher[front];
1349                         bool ab = wordMatcher[back];
1350                         if (af ^ ab)
1351                         {
1352                             $$
1353                         }
1354                     }
1355                     $$`, nextInstr, nextInstr, nextInstr, bailOut);
1356             break;
1357         case IR.Notwordboundary:
1358             code ~= ctSub( `
1359                     dchar back;
1360                     DataIndex bi;
1361                     //at start & end of input
1362                     if (atStart && wordMatcher[front])
1363                         $$
1364                     else if (atEnd && s.loopBack(index).nextChar(back, bi)
1365                             && wordMatcher[back])
1366                         $$
1367                     else if (s.loopBack(index).nextChar(back, bi))
1368                     {
1369                         bool af = wordMatcher[front];
1370                         bool ab = wordMatcher[back];
1371                         if (af ^ ab)
1372                             $$
1373                     }
1374                     $$`, bailOut, bailOut, bailOut, nextInstr);
1375 
1376             break;
1377         case IR.Bol:
1378             code ~= ctSub(`
1379                     dchar back;
1380                     DataIndex bi;
1381                     if (atStart || (s.loopBack(index).nextChar(back,bi)
1382                         && endOfLine(back, front == '\n')))
1383                     {
1384                         debug(std_regex_matcher) writeln("BOL matched");
1385                         $$
1386                     }
1387                     else
1388                         $$`, nextInstr, bailOut);
1389 
1390             break;
1391         case IR.Bof:
1392             code ~= ctSub(`
1393                     if (atStart)
1394                     {
1395                         debug(std_regex_matcher) writeln("BOF matched");
1396                         $$
1397                     }
1398                     else
1399                         $$`, nextInstr, bailOut);
1400             break;
1401         case IR.Eol:
1402             code ~= ctSub(`
1403                     dchar back;
1404                     DataIndex bi;
1405                     debug(std_regex_matcher) writefln("EOL (front 0x%x) %s", front, s[index .. s.lastIndex]);
1406                     //no matching inside \r\n
1407                     if (atEnd || (endOfLine(front, s.loopBack(index).nextChar(back,bi)
1408                              && back == '\r')))
1409                     {
1410                         debug(std_regex_matcher) writeln("EOL matched");
1411                         $$
1412                     }
1413                     else
1414                         $$`, nextInstr, bailOut);
1415             break;
1416         case IR.Eof:
1417             code ~= ctSub(`
1418                     if (atEnd)
1419                     {
1420                         debug(std_regex_matcher) writeln("BOF matched");
1421                         $$
1422                     }
1423                     else
1424                         $$`, nextInstr, bailOut);
1425             break;
1426         case IR.GroupStart:
1427             code ~= ctSub(`
1428                     matches[$$].begin = index;
1429                     $$`, ir[0].data, nextInstr);
1430             match = ir[0].data+1;
1431             break;
1432         case IR.GroupEnd:
1433             code ~= ctSub(`
1434                     matches[$$].end = index;
1435                     $$`, ir[0].data, nextInstr);
1436             break;
1437         case IR.Backref:
1438             string mStr = "auto referenced = ";
1439             mStr ~= ir[0].localRef
1440                 ? ctSub("s[matches[$$].begin .. matches[$$].end];",
1441                     ir[0].data, ir[0].data)
1442                 : ctSub("s[backrefed[$$].begin .. backrefed[$$].end];",
1443                     ir[0].data, ir[0].data);
1444             code ~= ctSub( `
1445                     $$
1446                     while (!atEnd && !referenced.empty && front == referenced.front)
1447                     {
1448                         next();
1449                         referenced.popFront();
1450                     }
1451                     if (referenced.empty)
1452                         $$
1453                     else
1454                         $$`, mStr, nextInstr, bailOut);
1455             break;
1456         case IR.Nop:
1457         case IR.End:
1458             break;
1459         default:
1460             assert(0, text(ir[0].mnemonic, " is not supported yet"));
1461         }
1462         return code;
1463     }
1464 
1465     //generate D code for the whole regex
ctGenRegExCtContext1466     public string ctGenRegEx(const(Bytecode)[] ir)
1467     {
1468         auto bdy = ctGenBlock(ir, 0);
1469         auto r = `
1470             import core.memory : pureMalloc, pureFree;
1471             with(matcher)
1472             {
1473             pc = 0;
1474             counter = 0;
1475             lastState = 0;
1476             matches[] = Group!DataIndex.init;
1477             auto start = s._index;`;
1478         r ~= `
1479             goto StartLoop;
1480             debug(std_regex_matcher) writeln("Try CT matching  starting at ",s[index .. s.lastIndex]);
1481         L_backtrack:
1482             if (lastState || prevStack())
1483             {
1484                 stackPop(pc);
1485                 stackPop(index);
1486                 s.reset(index);
1487                 next();
1488             }
1489             else
1490             {
1491                 s.reset(start);
1492                 return false;
1493             }
1494         StartLoop:
1495             switch (pc)
1496             {
1497         `;
1498         r ~= bdy.code;
1499         r ~= ctSub(`
1500                 case $$: break;`,bdy.addr);
1501         r ~= `
1502             default:
1503                 assert(0);
1504             }
1505             // cleanup stale stack blocks
1506             while (prevStack()) {}
1507             return true;
1508             }
1509         `;
1510         return r;
1511     }
1512 
1513 }
1514 
ctGenRegExCode(Char)1515 string ctGenRegExCode(Char)(const Regex!Char re)
1516 {
1517     auto context = CtContext(re);
1518     return context.ctGenRegEx(re.ir);
1519 }
1520