1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Resolve special assert vertices.
31  *
32  * The assert resolution algorithm proceeds by iterating over those edges with
33  * assertion flags, considering source and target vertices of each edge. If a
34  * vertex has a superset of the reachability demanded by the assertion on the
35  * edge, it is split into alternatives providing the word and non-word paths
36  * through that vertex.
37  *
38  * A great deal of the complexity in the resolveAsserts pass is devoted to
39  * handling these assertions when the UCP flag is specified (meaning \\w and \\W
40  * are implemented with Unicode properties, rather than their ASCII
41  * interpretation) and the prefiltering flag is also used. Complete,
42  * non-prefiltering UCP support is not available yet.
43  */
44 #include "ng_asserts.h"
45 
46 #include "ng.h"
47 #include "ng_prune.h"
48 #include "ng_redundancy.h"
49 #include "ng_util.h"
50 #include "compiler/compiler.h"
51 #include "parser/position.h" // for POS flags
52 #include "util/bitutils.h" // for findAndClearLSB_32
53 #include "util/boundary_reports.h"
54 #include "util/container.h"
55 #include "util/compile_context.h"
56 #include "util/compile_error.h"
57 #include "util/graph_range.h"
58 #include "util/report_manager.h"
59 #include "util/unicode_def.h"
60 
61 #include <queue>
62 
63 using namespace std;
64 
65 namespace ue2 {
66 
67 /** \brief Hard limit on the maximum number of vertices we'll clone before we
68  * throw up our hands and report 'Pattern too large.' */
69 static const size_t MAX_CLONED_VERTICES = 2048;
70 
71 /** \brief The definition of \\w, since we use it everywhere in here. */
72 static const CharReach CHARREACH_WORD(CharReach('a', 'z') |
73         CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_'));
74 
75 /** \brief \\W is the inverse of \\w */
76 static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD);
77 
78 /** \brief Prefiltering definition of \\w for UCP mode.
79  *
80  * Includes all high bytes as to capture all non-ASCII, however depending on
81  * direction only continuers or starters are strictly required - as the input
82  * is well-formed, this laxness will not cost us. */
83 static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD
84                                               | CharReach(128, 255));
85 
86 /** \brief Prefiltering definition of \\W for UCP Mode.
87  *
88  * (non-word already includes high bytes) */
89 static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD);
90 
91 /** \brief Find all the edges with assertion flags. */
92 static
getAsserts(const NGHolder & g)93 vector<NFAEdge> getAsserts(const NGHolder &g) {
94     vector<NFAEdge> out;
95     for (const auto &e : edges_range(g)) {
96         if (g[e].assert_flags) {
97             out.push_back(e);
98         }
99     }
100     return out;
101 }
102 
103 static
addToSplit(const NGHolder & g,NFAVertex v,map<u32,NFAVertex> * to_split)104 void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) {
105     DEBUG_PRINTF("%zu needs splitting\n", g[v].index);
106     to_split->emplace(g[v].index, v);
107 }
108 
109 /** \brief Find vertices that need to be split due to an assertion edge.
110  *
111  * A vertex needs to be split if has an edge to/from it with an assert with a
112  * restriction on the relevant end. */
113 static
findSplitters(const NGHolder & g,const vector<NFAEdge> & asserts,map<u32,NFAVertex> * to_split,map<u32,NFAVertex> * to_split_ucp)114 void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
115                    map<u32, NFAVertex> *to_split,
116                    map<u32, NFAVertex> *to_split_ucp) {
117     for (const auto &e : asserts) {
118         NFAVertex u = source(e, g);
119         NFAVertex v = target(e, g);
120         u32 flags = g[e].assert_flags;
121         assert(flags);
122 
123         const CharReach &u_cr = g[u].char_reach;
124         const CharReach &v_cr = g[v].char_reach;
125 
126         bool ucp_assert = flags & UCP_ASSERT_FLAGS;
127         bool normal_assert = flags & NON_UCP_ASSERT_FLAGS;
128         /* In reality, an expression can only be entirely ucp or not ucp */
129         assert(ucp_assert != normal_assert);
130 
131         if (normal_assert) {
132             /* assume any flag results in us have to split if the vertex is not
133              * a subset of word or completely disjoint from it. We could be more
134              * nuanced if flags is a disjunction of multiple assertions. */
135             if (!u_cr.isSubsetOf(CHARREACH_WORD)
136                 && !u_cr.isSubsetOf(CHARREACH_NONWORD)
137                 && u != g.start) { /* start is always considered a nonword */
138                 addToSplit(g, u, to_split);
139             }
140 
141             if (!v_cr.isSubsetOf(CHARREACH_WORD)
142                 && !v_cr.isSubsetOf(CHARREACH_NONWORD)
143                 && v != g.accept /* accept require special handling, done on a
144                                   * per edge basis in resolve asserts
145                                   */
146                 && v != g.acceptEod) { /* eod is always considered a nonword */
147                 addToSplit(g, v, to_split);
148             }
149         }
150 
151         if (ucp_assert) {
152             /* note: the ucp prefilter crs overlap - requires a bit more care */
153             if (u == g.start) { /* start never needs to be split,
154                                  * treat nonword */
155             } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) {
156                 if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
157                     && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
158                     addToSplit(g, u, to_split_ucp);
159                 }
160             } else {
161                 assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP);
162                 if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
163                     && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
164                     addToSplit(g, u, to_split_ucp);
165                 }
166             }
167 
168             if (v == g.acceptEod /* eod is always considered a nonword */
169                 || v == g.accept) {  /* accept require special handling, done on
170                                       * a per edge basis in resolve asserts */
171             } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) {
172                 if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
173                     && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
174                     addToSplit(g, v, to_split_ucp);
175                 }
176             } else {
177                 assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP);
178                 if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
179                     && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
180                     addToSplit(g, v, to_split_ucp);
181                 }
182             }
183         }
184     }
185 }
186 
187 static
setReportId(ReportManager & rm,NGHolder & g,const ExpressionInfo & expr,NFAVertex v,s32 adj)188 void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
189                  NFAVertex v, s32 adj) {
190     // Don't try and set the report ID of a special vertex.
191     assert(!is_special(v, g));
192 
193     // If there's a report set already, we're replacing it.
194     g[v].reports.clear();
195 
196     Report ir = rm.getBasicInternalReport(expr, adj);
197 
198     g[v].reports.insert(rm.getInternalId(ir));
199     DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
200 }
201 
202 static
makeClone(ReportManager & rm,NGHolder & g,const ExpressionInfo & expr,NFAVertex v,const CharReach & cr_mask)203 NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
204                     NFAVertex v, const CharReach &cr_mask) {
205     NFAVertex clone = clone_vertex(g, v);
206     g[clone].char_reach &= cr_mask;
207     clone_out_edges(g, v, clone);
208     clone_in_edges(g, v, clone);
209 
210     if (v == g.startDs) {
211         if (expr.utf8) {
212             g[clone].char_reach &= ~UTF_START_CR;
213         }
214 
215         DEBUG_PRINTF("marked as virt\n");
216         g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
217 
218         setReportId(rm, g, expr, clone, 0);
219     }
220 
221     return clone;
222 }
223 
224 static
splitVertex(ReportManager & rm,NGHolder & g,const ExpressionInfo & expr,NFAVertex v,bool ucp)225 void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
226                  NFAVertex v, bool ucp) {
227     assert(v != g.start);
228     assert(v != g.accept);
229     assert(v != g.acceptEod);
230     DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n", g[v].index, (int)ucp);
231 
232     CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD;
233     CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD;
234 
235     auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
236 
237     // Split v into word/nonword vertices with only asserting out-edges.
238     NFAVertex w_out = makeClone(rm, g, expr, v, cr_word);
239     NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword);
240     remove_out_edge_if(w_out, has_no_assert, g);
241     remove_out_edge_if(nw_out, has_no_assert, g);
242 
243     // Split v into word/nonword vertices with only asserting in-edges.
244     NFAVertex w_in = makeClone(rm, g, expr, v, cr_word);
245     NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword);
246     remove_in_edge_if(w_in, has_no_assert, g);
247     remove_in_edge_if(nw_in, has_no_assert, g);
248 
249     // Prune edges with asserts from original v.
250     auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; };
251     remove_in_edge_if(v, has_assert, g);
252     remove_out_edge_if(v, has_assert, g);
253 }
254 
255 static
resolveEdges(ReportManager & rm,NGHolder & g,const ExpressionInfo & expr,set<NFAEdge> * dead)256 void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
257                   set<NFAEdge> *dead) {
258     for (const auto &e : edges_range(g)) {
259         u32 flags = g[e].assert_flags;
260         if (!flags) {
261             continue;
262         }
263 
264         NFAVertex u = source(e, g);
265         NFAVertex v = target(e, g);
266 
267         assert(u != g.startDs);
268 
269         const CharReach &u_cr = g[u].char_reach;
270         const CharReach &v_cr = g[v].char_reach;
271 
272         bool impassable = true;
273         bool ucp = flags & UCP_ASSERT_FLAGS;
274         DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n",
275                      g[u].index, g[v].index, flags, (int)ucp);
276         while (flags && impassable) {
277             u32 flag = 1U << findAndClearLSB_32(&flags);
278             switch (flag) {
279             case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
280             case POS_FLAG_ASSERT_NONWORD_TO_WORD:
281                 if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) {
282                     continue;
283                 }
284                 break;
285             case POS_FLAG_ASSERT_WORD_TO_NONWORD:
286             case POS_FLAG_ASSERT_WORD_TO_WORD:
287                 if ((u_cr & CHARREACH_WORD).none() || u == g.start) {
288                     continue;
289                 }
290                 break;
291             case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
292             case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
293                 if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) {
294                     continue;
295                 }
296                 break;
297             case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
298             case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
299                 if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) {
300                     continue;
301                 }
302                 break;
303             default:
304                 assert(0);
305             }
306 
307             if (v == g.accept) {
308                 /* accept special will need to be treated specially later */
309                 impassable = false;
310                 continue;
311             }
312 
313             switch (flag) {
314             case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
315             case POS_FLAG_ASSERT_WORD_TO_NONWORD:
316                 if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) {
317                     continue;
318                 }
319                 break;
320             case POS_FLAG_ASSERT_WORD_TO_WORD:
321             case POS_FLAG_ASSERT_NONWORD_TO_WORD:
322                 if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) {
323                     continue;
324                 }
325                 break;
326             case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
327             case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
328                 if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any()
329                     && v != g.acceptEod) {
330                     continue;
331                 }
332                 break;
333             case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
334             case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
335                 if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any()
336                     || v == g.acceptEod) {
337                     continue;
338                 }
339                 break;
340             default:
341                 assert(0);
342             }
343             impassable = false;
344         }
345 
346         if (impassable) {
347             dead->insert(e);
348         } else if (v == g.accept && !ucp) {
349             bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start;
350             UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start;
351             assert(u_w != u_nw);
352             bool v_w = false;
353             bool v_nw = false;
354 
355             flags = g[e].assert_flags;
356             if (u_w) {
357                 v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD;
358                 v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD;
359             } else {
360                 v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD;
361                 v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD;
362             }
363             assert(v_w || v_nw);
364             if (v_w && v_nw) {
365                 /* edge is effectively unconditional */
366                 g[e].assert_flags = 0;
367             } else if (v_w) {
368                 /* need to add a word byte */
369                 NFAVertex vv = add_vertex(g);
370                 setReportId(rm, g, expr, vv, -1);
371                 g[vv].char_reach = CHARREACH_WORD;
372                 add_edge(vv, g.accept, g);
373                 g[e].assert_flags = 0;
374                 add_edge(u, vv, g[e], g);
375                 dead->insert(e);
376             } else {
377                 /* need to add a non word byte or see eod */
378                 NFAVertex vv = add_vertex(g);
379                 setReportId(rm, g, expr, vv, -1);
380                 g[vv].char_reach = CHARREACH_NONWORD;
381                 add_edge(vv, g.accept, g);
382                 g[e].assert_flags = 0;
383                 add_edge(u, vv, g[e], g);
384                 /* there may already be a different edge from start to eod if so
385                  * we need to make it unconditional and alive
386                  */
387                 if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
388                     g[start_eod].assert_flags = 0;
389                     dead->erase(start_eod);
390                 } else {
391                     add_edge(u, g.acceptEod, g[e], g);
392                 }
393                 dead->insert(e);
394             }
395         } else if (v == g.accept && ucp) {
396             DEBUG_PRINTF("resolving ucp assert to accept\n");
397             assert(u_cr.any());
398             bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any()
399                        && u != g.start;
400             bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any()
401                        || u == g.start;
402             assert(u_w || u_nw);
403 
404             bool v_w = false;
405             bool v_nw = false;
406 
407             flags = g[e].assert_flags;
408             if (u_w) {
409                 v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP;
410                 v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP;
411             }
412             if (u_nw) {
413                 v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP;
414                 v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
415             }
416             assert(v_w || v_nw);
417             if (v_w && v_nw) {
418                 /* edge is effectively unconditional */
419                 g[e].assert_flags = 0;
420             } else if (v_w) {
421                 /* need to add a word byte */
422                 NFAVertex vv = add_vertex(g);
423                 setReportId(rm, g, expr, vv, -1);
424                 g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
425                 add_edge(vv, g.accept, g);
426                 g[e].assert_flags = 0;
427                 add_edge(u, vv, g[e], g);
428                 dead->insert(e);
429             } else {
430                 /* need to add a non word byte or see eod */
431                 NFAVertex vv = add_vertex(g);
432                 setReportId(rm, g, expr, vv, -1);
433                 g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
434                 add_edge(vv, g.accept, g);
435                 g[e].assert_flags = 0;
436                 add_edge(u, vv, g[e], g);
437                 /* there may already be a different edge from start to eod if so
438                  * we need to make it unconditional and alive
439                  */
440                 if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
441                     g[start_eod].assert_flags = 0;
442                     dead->erase(start_eod);
443                 } else {
444                     add_edge(u, g.acceptEod, g[e], g);
445                 }
446                 dead->insert(e);
447             }
448         } else {
449             /* we can remove the asserts as we have partitioned the vertices
450              * into w/nw around the assert edges
451              */
452             g[e].assert_flags = 0;
453         }
454     }
455 }
456 
resolveAsserts(ReportManager & rm,NGHolder & g,const ExpressionInfo & expr)457 void resolveAsserts(ReportManager &rm, NGHolder &g,
458                     const ExpressionInfo &expr) {
459     vector<NFAEdge> asserts = getAsserts(g);
460     if (asserts.empty()) {
461         return;
462     }
463 
464     map<u32, NFAVertex> to_split; /* by index, for determinism */
465     map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
466     findSplitters(g, asserts, &to_split, &to_split_ucp);
467     if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
468         throw CompileError(expr.index, "Pattern is too large.");
469     }
470 
471     for (const auto &m : to_split) {
472         assert(!contains(to_split_ucp, m.first));
473         splitVertex(rm, g, expr, m.second, false);
474     }
475 
476     for (const auto &m : to_split_ucp) {
477         splitVertex(rm, g, expr, m.second, true);
478     }
479 
480     set<NFAEdge> dead;
481     resolveEdges(rm, g, expr, &dead);
482 
483     remove_edges(dead, g);
484     renumber_vertices(g);
485     pruneUseless(g);
486     pruneEmptyVertices(g);
487 
488     renumber_vertices(g);
489     renumber_edges(g);
490     clearReports(g);
491 }
492 
ensureCodePointStart(ReportManager & rm,NGHolder & g,const ExpressionInfo & expr)493 void ensureCodePointStart(ReportManager &rm, NGHolder &g,
494                           const ExpressionInfo &expr) {
495     /* In utf8 mode there is an implicit assertion that we start at codepoint
496      * boundaries. Assert resolution handles the badness coming from asserts.
497      * The only other source of trouble is startDs->accept connections.
498      */
499     NFAEdge orig = edge(g.startDs, g.accept, g);
500     if (expr.utf8 && orig) {
501         DEBUG_PRINTF("rectifying %u\n", expr.report);
502         Report ir = rm.getBasicInternalReport(expr);
503         ReportID rep = rm.getInternalId(ir);
504 
505         NFAVertex v_a = add_vertex(g);
506         g[v_a].assert_flags = POS_FLAG_VIRTUAL_START;
507         g[v_a].char_reach = UTF_ASCII_CR;
508         add_edge(v_a, g.accept, g[orig], g);
509 
510         NFAVertex v_2 = add_vertex(g);
511         g[v_2].assert_flags = POS_FLAG_VIRTUAL_START;
512         g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX);
513 
514         NFAVertex v_3 = add_vertex(g);
515         g[v_3].assert_flags = POS_FLAG_VIRTUAL_START;
516         g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX);
517 
518         NFAVertex v_4 = add_vertex(g);
519         g[v_4].assert_flags = POS_FLAG_VIRTUAL_START;
520         g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX);
521 
522         NFAVertex v_c = add_vertex(g);
523         g[v_c].assert_flags = POS_FLAG_VIRTUAL_START;
524         g[v_c].char_reach = UTF_CONT_CR;
525         add_edge(v_c, g.accept, g[orig], g);
526 
527         add_edge(v_2, v_c, g);
528 
529         NFAVertex v_3c = add_vertex(g);
530         g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START;
531         g[v_3c].char_reach = UTF_CONT_CR;
532         add_edge(v_3c, v_c, g);
533         add_edge(v_3, v_3c, g);
534 
535         NFAVertex v_4c = add_vertex(g);
536         g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START;
537         g[v_4c].char_reach = UTF_CONT_CR;
538         add_edge(v_4c, v_3c, g);
539         add_edge(v_4, v_4c, g);
540 
541         g[v_a].reports.insert(rep);
542         g[v_c].reports.insert(rep);
543 
544         add_edge(g.start, v_a, g);
545         add_edge(g.startDs, v_a, g);
546         add_edge(g.start, v_2, g);
547         add_edge(g.startDs, v_2, g);
548         add_edge(g.start, v_3, g);
549         add_edge(g.startDs, v_3, g);
550         add_edge(g.start, v_4, g);
551         add_edge(g.startDs, v_4, g);
552         remove_edge(orig, g);
553         renumber_edges(g);
554         clearReports(g);
555     }
556 }
557 
558 } // namespace ue2
559