1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /**
30  * \file
31  * \brief Propagate extended parameters to vertex reports and reduce graph if
32  * possible.
33  *
34  * This code handles the propagation of the extension parameters specified by
35  * the user with the \ref hs_expr_ext structure into the reports on the graph's
36  * vertices.
37  *
38  * There are also some analyses that prune edges that cannot contribute to a
39  * match given these constraints, or transform the graph in order to make a
40  * constraint implicit.
41  */
42 
43 #include "ng_extparam.h"
44 
45 #include "ng.h"
46 #include "ng_depth.h"
47 #include "ng_dump.h"
48 #include "ng_prune.h"
49 #include "ng_reports.h"
50 #include "ng_som_util.h"
51 #include "ng_width.h"
52 #include "ng_util.h"
53 #include "ue2common.h"
54 #include "compiler/compiler.h"
55 #include "parser/position.h"
56 #include "util/compile_context.h"
57 #include "util/compile_error.h"
58 #include "util/container.h"
59 #include "util/graph.h"
60 #include "util/graph_range.h"
61 
62 #include <sstream>
63 #include <string>
64 
65 using namespace std;
66 
67 namespace ue2 {
68 
69 static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000;
70 static const u32 MAX_MINLENGTH_TO_CONVERT = 2000;
71 
72 /** True if all the given reports have the same extparam bounds. */
73 template<typename Container>
hasSameBounds(const Container & reports,const ReportManager & rm)74 bool hasSameBounds(const Container &reports, const ReportManager &rm) {
75     assert(!reports.empty());
76 
77     const auto &first = rm.getReport(*reports.begin());
78     for (auto id : reports) {
79         const auto &report = rm.getReport(id);
80         if (report.minOffset != first.minOffset ||
81             report.maxOffset != first.maxOffset ||
82             report.minLength != first.minLength) {
83             return false;
84         }
85     }
86 
87     return true;
88 }
89 
90 /**
91  * \brief Find the (min, max) offset adjustment for the reports on a given
92  * vertex.
93  */
94 static
getMinMaxOffsetAdjust(const ReportManager & rm,const NGHolder & g,NFAVertex v)95 pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm,
96                                     const NGHolder &g, NFAVertex v) {
97     s32 minAdj = 0, maxAdj = 0;
98     const auto &reports = g[v].reports;
99     for (auto ri = reports.begin(), re = reports.end(); ri != re; ++ri) {
100         const Report &ir = rm.getReport(*ri);
101         if (ri == reports.begin()) {
102             minAdj = ir.offsetAdjust;
103             maxAdj = ir.offsetAdjust;
104         } else {
105             minAdj = min(minAdj, ir.offsetAdjust);
106             maxAdj = max(maxAdj, ir.offsetAdjust);
107         }
108     }
109 
110     return make_pair(minAdj, maxAdj);
111 }
112 
113 /** \brief Find the (min, max) length of any match for the given holder. */
114 static
findMatchLengths(const ReportManager & rm,const NGHolder & g)115 DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
116     DepthMinMax match_depths;
117 
118     vector<DepthMinMax> depths = getDistancesFromSOM(g);
119 
120     pair<s32, s32> adj;
121 
122     for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
123         u32 idx = g[v].index;
124         DepthMinMax d = depths[idx]; // copy
125         adj = getMinMaxOffsetAdjust(rm, g, v);
126         DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
127                      d.str().c_str(), adj.first, adj.second);
128         d.min += adj.first;
129         d.max += adj.second;
130         match_depths = unionDepthMinMax(match_depths, d);
131     }
132 
133     for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
134         if (v == g.accept) {
135             continue;
136         }
137         u32 idx = g[v].index;
138         DepthMinMax d = depths[idx]; // copy
139         adj = getMinMaxOffsetAdjust(rm, g, v);
140         DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
141                      d.str().c_str(), adj.first, adj.second);
142         d.min += adj.first;
143         d.max += adj.second;
144         match_depths = unionDepthMinMax(match_depths, d);
145     }
146 
147     DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str());
148 
149     assert(match_depths.min.is_reachable());
150     assert(match_depths.max.is_reachable());
151     return match_depths;
152 }
153 
154 template<typename Function>
replaceReports(NGHolder & g,NFAVertex accept,flat_set<NFAVertex> & seen,Function func)155 void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen,
156                     Function func) {
157     for (auto v : inv_adjacent_vertices_range(accept, g)) {
158         if (v == g.accept) {
159             // Don't operate on accept: the accept->acceptEod edge is stylised.
160             assert(accept == g.acceptEod);
161             assert(g[v].reports.empty());
162             continue;
163         }
164 
165         if (!seen.insert(v).second) {
166             continue; // We have already processed v.
167         }
168 
169         auto &reports = g[v].reports;
170         if (reports.empty()) {
171             continue;
172         }
173         decltype(g[v].reports) new_reports;
174         for (auto id : g[v].reports) {
175             new_reports.insert(func(v, id));
176         }
177         reports = std::move(new_reports);
178     }
179 }
180 
181 /**
182  * Generic function for replacing all the reports in the graph.
183  *
184  * Pass this a function that takes a vertex and a ReportID returns another
185  * ReportID (or the same one) to replace it with.
186  */
187 template<typename Function>
replaceReports(NGHolder & g,Function func)188 void replaceReports(NGHolder &g, Function func) {
189     flat_set<NFAVertex> seen;
190     replaceReports(g, g.accept, seen, func);
191     replaceReports(g, g.acceptEod, seen, func);
192 }
193 
194 /** \brief Replace the graph's reports with new reports that specify bounds. */
195 static
updateReportBounds(ReportManager & rm,NGHolder & g,const ExpressionInfo & expr)196 void updateReportBounds(ReportManager &rm, NGHolder &g,
197                         const ExpressionInfo &expr) {
198     DEBUG_PRINTF("updating report bounds\n");
199     replaceReports(g, [&](NFAVertex, ReportID id) {
200         Report report = rm.getReport(id); // make a copy
201         assert(!report.hasBounds());
202 
203         // Note that we need to cope with offset adjustment here.
204 
205         report.minOffset = expr.min_offset - report.offsetAdjust;
206         if (expr.max_offset == MAX_OFFSET) {
207             report.maxOffset = MAX_OFFSET;
208         } else {
209             report.maxOffset = expr.max_offset - report.offsetAdjust;
210         }
211         assert(report.maxOffset >= report.minOffset);
212 
213         report.minLength = expr.min_length;
214         if (expr.min_length && !expr.som) {
215             report.quashSom = true;
216         }
217 
218         DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, "
219                      "min_length=%llu\n", id, report.minOffset,
220                      report.maxOffset, report.minLength);
221 
222         return rm.getInternalId(report);
223     });
224 }
225 
226 static
hasVirtualStarts(const NGHolder & g)227 bool hasVirtualStarts(const NGHolder &g) {
228     for (auto v : adjacent_vertices_range(g.start, g)) {
229         if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
230             return true;
231         }
232     }
233     return false;
234 }
235 
236 /** Set the min_length param for all reports to zero.  */
237 static
clearMinLengthParam(NGHolder & g,ReportManager & rm)238 void clearMinLengthParam(NGHolder &g, ReportManager &rm) {
239     DEBUG_PRINTF("clearing min length\n");
240     replaceReports(g, [&rm](NFAVertex, ReportID id) {
241         const auto &report = rm.getReport(id);
242         if (report.minLength) {
243             Report new_report = report;
244             new_report.minLength = 0;
245             return rm.getInternalId(new_report);
246         }
247         return id;
248     });
249 }
250 
251 /**
252  * Set the min_offset param to zero and the max_offset param to MAX_OFFSET for
253  * all reports.
254  */
255 static
clearOffsetParams(NGHolder & g,ReportManager & rm)256 void clearOffsetParams(NGHolder &g, ReportManager &rm) {
257     DEBUG_PRINTF("clearing min and max offset\n");
258     replaceReports(g, [&rm](NFAVertex, ReportID id) {
259         const auto &report = rm.getReport(id);
260         if (report.minLength) {
261             Report new_report = report;
262             new_report.minOffset = 0;
263             new_report.maxOffset = MAX_OFFSET;
264             return rm.getInternalId(new_report);
265         }
266         return id;
267     });
268 }
269 
270 /**
271  * If the pattern is unanchored, has a max_offset and has not asked for SOM, we
272  * can use that knowledge to anchor it which will limit its lifespan. Note that
273  * we can't use this transformation if there's a min_length, as it's currently
274  * handled using "sly SOM".
275  *
276  * Note that it is possible to handle graphs that have a combination of
277  * anchored and unanchored paths, but it's too tricky for the moment.
278  */
279 static
anchorPatternWithBoundedRepeat(NGHolder & g,ReportManager & rm)280 bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) {
281     if (!isFloating(g)) {
282         return false;
283     }
284 
285     const auto &reports = all_reports(g);
286     if (reports.empty()) {
287         return false;
288     }
289 
290     if (any_of_in(reports, [&](ReportID id) {
291             const auto &report = rm.getReport(id);
292             return report.maxOffset == MAX_OFFSET || report.minLength ||
293                    report.offsetAdjust;
294         })) {
295         return false;
296     }
297 
298     if (!hasSameBounds(reports, rm)) {
299         DEBUG_PRINTF("mixed report bounds\n");
300         return false;
301     }
302 
303     const depth minWidth = findMinWidth(g);
304     const depth maxWidth = findMaxWidth(g);
305 
306     assert(minWidth <= maxWidth);
307     assert(maxWidth.is_reachable());
308 
309     const auto &first_report = rm.getReport(*reports.begin());
310     const auto min_offset = first_report.minOffset;
311     const auto max_offset = first_report.maxOffset;
312     assert(max_offset < MAX_OFFSET);
313 
314     DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
315                  minWidth.str().c_str(), maxWidth.str().c_str(),
316                  min_offset, max_offset);
317 
318     if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
319         return false;
320     }
321 
322     if (max_offset < minWidth) {
323         assert(0);
324         return false;
325     }
326 
327     // If the pattern has virtual starts, we probably don't want to touch it.
328     if (hasVirtualStarts(g)) {
329         DEBUG_PRINTF("virtual starts, bailing\n");
330         return false;
331     }
332 
333     // Similarly, bail if the pattern is vacuous. TODO: this could be done, we
334     // would just need to be a little careful with reports.
335     if (isVacuous(g)) {
336         DEBUG_PRINTF("vacuous, bailing\n");
337         return false;
338     }
339 
340     u32 min_bound, max_bound;
341     if (maxWidth.is_infinite()) {
342         min_bound = 0;
343         max_bound = max_offset - minWidth;
344     } else {
345         min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0;
346         max_bound = max_offset - minWidth;
347     }
348 
349     DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
350 
351     vector<NFAVertex> initials;
352     for (auto v : adjacent_vertices_range(g.startDs, g)) {
353         if (v == g.startDs) {
354             continue;
355         }
356         initials.push_back(v);
357     }
358     if (initials.empty()) {
359         DEBUG_PRINTF("no initial vertices\n");
360         return false;
361     }
362 
363     // Wire up 'min_offset' mandatory dots from anchored start.
364     NFAVertex u = g.start;
365     for (u32 i = 0; i < min_bound; i++) {
366         NFAVertex v = add_vertex(g);
367         g[v].char_reach.setall();
368         add_edge(u, v, g);
369         u = v;
370     }
371 
372     NFAVertex head = u;
373 
374     // Wire up optional dots for (max_offset - min_offset).
375     for (u32 i = 0; i < max_bound - min_bound; i++) {
376         NFAVertex v = add_vertex(g);
377         g[v].char_reach.setall();
378         if (head != u) {
379             add_edge(head, v, g);
380         }
381         add_edge(u, v, g);
382         u = v;
383     }
384 
385     // Remove edges from starts and wire both head and u to our initials.
386     for (auto v : initials) {
387         remove_edge(g.startDs, v, g);
388         remove_edge(g.start, v, g);
389 
390         if (head != u) {
391             add_edge(head, v, g);
392         }
393         add_edge(u, v, g);
394     }
395 
396     renumber_vertices(g);
397     renumber_edges(g);
398 
399     if (minWidth == maxWidth) {
400         // For a fixed width pattern, we can retire the offsets as
401         // they are implicit in the graph now.
402         clearOffsetParams(g, rm);
403     }
404 
405     clearReports(g);
406     return true;
407 }
408 
409 static
findSingleCyclic(const NGHolder & g)410 NFAVertex findSingleCyclic(const NGHolder &g) {
411     NFAVertex v = NGHolder::null_vertex();
412     for (const auto &e : edges_range(g)) {
413         if (source(e, g) == target(e, g)) {
414             if (source(e, g) == g.startDs) {
415                 continue;
416             }
417             if (v != NGHolder::null_vertex()) {
418                 // More than one cyclic vertex.
419                 return NGHolder::null_vertex();
420             }
421             v = source(e, g);
422         }
423     }
424 
425     if (v != NGHolder::null_vertex()) {
426         DEBUG_PRINTF("cyclic is %zu\n", g[v].index);
427         assert(!is_special(v, g));
428     }
429     return v;
430 }
431 
432 static
hasOffsetAdjust(const ReportManager & rm,NGHolder & g,int * adjust)433 bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g,
434                      int *adjust) {
435     const auto &reports = all_reports(g);
436     if (reports.empty()) {
437         assert(0);
438         return false;
439     }
440 
441     int offsetAdjust = rm.getReport(*reports.begin()).offsetAdjust;
442     for (auto report : reports) {
443         const Report &ir = rm.getReport(report);
444         if (ir.offsetAdjust != offsetAdjust) {
445             DEBUG_PRINTF("different adjusts!\n");
446             return false;
447         }
448     }
449 
450     *adjust = offsetAdjust;
451     return true;
452 }
453 
454 /**
455  * If the pattern has a min_length and is of "ratchet" form with one unbounded
456  * repeat, that repeat can become a bounded repeat.
457  *
458  *     /foo.*bar/{min_length=100} --> /foo.{94,}bar/
459  */
460 static
transformMinLengthToRepeat(NGHolder & g,ReportManager & rm)461 bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
462     const auto &reports = all_reports(g);
463 
464     if (reports.empty()) {
465         return false;
466     }
467 
468     if (!hasSameBounds(reports, rm)) {
469         DEBUG_PRINTF("mixed report bounds\n");
470         return false;
471     }
472 
473     const auto &min_length = rm.getReport(*reports.begin()).minLength;
474     if (!min_length || min_length > MAX_MINLENGTH_TO_CONVERT) {
475         return false;
476     }
477 
478     // If the pattern has virtual starts, we probably don't want to touch it.
479     if (hasVirtualStarts(g)) {
480         DEBUG_PRINTF("virtual starts, bailing\n");
481         return false;
482     }
483 
484     // The graph must contain a single cyclic vertex (other than startDs), and
485     // that vertex can have one pred and one successor.
486     NFAVertex cyclic = findSingleCyclic(g);
487     if (cyclic == NGHolder::null_vertex()) {
488         return false;
489     }
490 
491     NGHolder::adjacency_iterator ai, ae;
492     tie(ai, ae) = adjacent_vertices(g.start, g);
493     if (*ai == g.startDs) {
494         ++ai;
495     }
496     NFAVertex v = *ai;
497     if (++ai != ae) {
498         DEBUG_PRINTF("more than one initial vertex\n");
499         return false;
500     }
501 
502     u32 width = 0;
503 
504     // Walk from the start vertex to the cyclic state and ensure we have a
505     // chain of vertices.
506     while (v != cyclic) {
507         DEBUG_PRINTF("vertex %zu\n", g[v].index);
508         width++;
509         auto succ = succs(v, g);
510         if (contains(succ, cyclic)) {
511             if (succ.size() == 1) {
512                 v = cyclic;
513             } else if (succ.size() == 2) {
514                 // Cyclic and jump edge.
515                 succ.erase(cyclic);
516                 NFAVertex v2 = *succ.begin();
517                 if (!edge(cyclic, v2, g).second) {
518                     DEBUG_PRINTF("bad form\n");
519                     return false;
520                 }
521                 v = cyclic;
522             } else {
523                 DEBUG_PRINTF("bad form\n");
524                 return false;
525             }
526         } else {
527             if (succ.size() != 1) {
528                 DEBUG_PRINTF("bad form\n");
529                 return false;
530             }
531             v = *succ.begin();
532         }
533     }
534 
535     // Check the cyclic state is A-OK.
536     v = getSoleDestVertex(g, cyclic);
537     if (v == NGHolder::null_vertex()) {
538         DEBUG_PRINTF("cyclic has more than one successor\n");
539         return false;
540     }
541 
542     // Walk from the cyclic state to an accept and ensure we have a chain of
543     // vertices.
544     while (!is_any_accept(v, g)) {
545         DEBUG_PRINTF("vertex %zu\n", g[v].index);
546         width++;
547         auto succ = succs(v, g);
548         if (succ.size() != 1) {
549             DEBUG_PRINTF("bad form\n");
550             return false;
551         }
552         v = *succ.begin();
553     }
554 
555     int offsetAdjust = 0;
556     if (!hasOffsetAdjust(rm, g, &offsetAdjust)) {
557         return false;
558     }
559     DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust);
560     width += offsetAdjust;
561 
562     DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width,
563                   g[cyclic].index);
564 
565     if (width >= min_length) {
566         DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
567                       min_length, width);
568         clearMinLengthParam(g, rm);
569         return true;
570     }
571 
572     vector<NFAVertex> preds;
573     vector<NFAEdge> dead;
574     for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
575         DEBUG_PRINTF("pred %zu\n", g[u].index);
576         if (u == cyclic) {
577             continue;
578         }
579         preds.push_back(u);
580 
581         // We want to delete the out-edges of each predecessor, but need to
582         // make sure we don't delete the startDs self loop.
583         for (const auto &e : out_edges_range(u, g)) {
584             if (target(e, g) != g.startDs) {
585                 dead.push_back(e);
586             }
587         }
588     }
589 
590     remove_edges(dead, g);
591 
592     assert(!preds.empty());
593 
594     const CharReach &cr = g[cyclic].char_reach;
595 
596     for (u32 i = 0; i < min_length - width - 1; ++i) {
597         v = add_vertex(g);
598         g[v].char_reach = cr;
599 
600         for (auto u : preds) {
601             add_edge(u, v, g);
602         }
603         preds.clear();
604         preds.push_back(v);
605     }
606     assert(!preds.empty());
607     for (auto u : preds) {
608         add_edge(u, cyclic, g);
609     }
610 
611     renumber_vertices(g);
612     renumber_edges(g);
613     clearMinLengthParam(g, rm);
614     clearReports(g);
615     return true;
616 }
617 
618 static
hasExtParams(const ExpressionInfo & expr)619 bool hasExtParams(const ExpressionInfo &expr) {
620     if (expr.min_length != 0) {
621         return true;
622     }
623     if (expr.min_offset != 0) {
624         return true;
625     }
626     if (expr.max_offset != MAX_OFFSET) {
627         return true;
628     }
629     return false;
630 }
631 
632 static
maxDistToAccept(const NFAVertexBidiDepth & d)633 const depth& maxDistToAccept(const NFAVertexBidiDepth &d) {
634     if (d.toAccept.max.is_unreachable()) {
635         return d.toAcceptEod.max;
636     } else if (d.toAcceptEod.max.is_unreachable()) {
637         return d.toAccept.max;
638     }
639     return max(d.toAccept.max, d.toAcceptEod.max);
640 }
641 
642 static
minDistFromStart(const NFAVertexBidiDepth & d)643 const depth& minDistFromStart(const NFAVertexBidiDepth &d) {
644     return min(d.fromStartDotStar.min, d.fromStart.min);
645 }
646 
647 static
minDistToAccept(const NFAVertexBidiDepth & d)648 const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
649     return min(d.toAccept.min, d.toAcceptEod.min);
650 }
651 
652 static
isEdgePrunable(const NGHolder & g,const Report & report,const vector<NFAVertexBidiDepth> & depths,const NFAEdge & e)653 bool isEdgePrunable(const NGHolder &g, const Report &report,
654                     const vector<NFAVertexBidiDepth> &depths,
655                     const NFAEdge &e) {
656     const NFAVertex u = source(e, g);
657     const NFAVertex v = target(e, g);
658 
659     DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index);
660 
661     // Leave our special-to-special edges alone.
662     if (is_special(u, g) && is_special(v, g)) {
663         DEBUG_PRINTF("ignoring special-to-special\n");
664         return false;
665     }
666 
667     // We must be careful around start: we don't want to remove (start, v) if
668     // (startDs, v) exists as well, since later code will assume the presence
669     // of both edges, but other cases are OK.
670     if (u == g.start && edge(g.startDs, v, g).second) {
671         DEBUG_PRINTF("ignoring unanchored start edge\n");
672         return false;
673     }
674 
675     u32 u_idx = g[u].index;
676     u32 v_idx = g[v].index;
677     assert(u_idx < depths.size() && v_idx < depths.size());
678 
679     const NFAVertexBidiDepth &du = depths.at(u_idx);
680     const NFAVertexBidiDepth &dv = depths.at(v_idx);
681 
682     if (report.minOffset) {
683         depth max_offset = maxDistFromStartOfData(du) + maxDistToAccept(dv);
684         if (max_offset.is_finite() && max_offset < report.minOffset) {
685             DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
686             return true;
687         }
688     }
689 
690     if (report.maxOffset != MAX_OFFSET) {
691         depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
692         assert(min_offset.is_finite());
693 
694         if (min_offset > report.maxOffset) {
695             DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
696             return true;
697         }
698     }
699 
700     if (report.minLength && is_any_accept(v, g)) {
701         // Simple take on min_length. If we're an edge to accept and our max
702         // dist from start is too small, we can be pruned.
703         const depth &width = maxDistFromInit(du);
704         if (width.is_finite() && width < report.minLength) {
705             DEBUG_PRINTF("max width %s from start too small for min_length\n",
706                          width.str().c_str());
707             return true;
708         }
709     }
710 
711     return false;
712 }
713 
714 static
pruneExtUnreachable(NGHolder & g,const ReportManager & rm)715 void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) {
716     const auto &reports = all_reports(g);
717     if (reports.empty()) {
718         return;
719     }
720 
721     if (!hasSameBounds(reports, rm)) {
722         DEBUG_PRINTF("report bounds vary\n");
723         return;
724     }
725 
726     const auto &report = rm.getReport(*reports.begin());
727 
728     auto depths = calcBidiDepths(g);
729 
730     vector<NFAEdge> dead;
731 
732     for (const auto &e : edges_range(g)) {
733         if (isEdgePrunable(g, report, depths, e)) {
734             DEBUG_PRINTF("pruning\n");
735             dead.push_back(e);
736         }
737     }
738 
739     if (dead.empty()) {
740         return;
741     }
742 
743     remove_edges(dead, g);
744     pruneUseless(g);
745     clearReports(g);
746 }
747 
748 /**
749  * Remove vacuous edges in graphs where the min_offset or min_length
750  * constraints dictate that they can never produce a match.
751  */
752 static
pruneVacuousEdges(NGHolder & g,const ReportManager & rm)753 void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) {
754     vector<NFAEdge> dead;
755 
756     auto has_min_offset = [&](NFAVertex v) {
757         assert(!g[v].reports.empty()); // must be reporter
758         return all_of_in(g[v].reports, [&](ReportID id) {
759             return rm.getReport(id).minOffset > 0;
760         });
761     };
762 
763     auto has_min_length = [&](NFAVertex v) {
764         assert(!g[v].reports.empty()); // must be reporter
765         return all_of_in(g[v].reports, [&](ReportID id) {
766             return rm.getReport(id).minLength > 0;
767         });
768     };
769 
770     for (const auto &e : edges_range(g)) {
771         const NFAVertex u = source(e, g);
772         const NFAVertex v = target(e, g);
773 
774         // Special case: Crudely remove vacuous edges from start in graphs with
775         // a min_offset.
776         if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) {
777             DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
778             dead.push_back(e);
779             continue;
780         }
781 
782         // If a min_length is set, vacuous edges can be removed.
783         if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) {
784             DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
785             dead.push_back(e);
786             continue;
787         }
788     }
789 
790     if (dead.empty()) {
791         return;
792     }
793 
794     DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size());
795     remove_edges(dead, g);
796     pruneUseless(g);
797     clearReports(g);
798 }
799 
800 static
pruneUnmatchable(NGHolder & g,const vector<DepthMinMax> & depths,const ReportManager & rm,NFAVertex accept)801 void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths,
802                       const ReportManager &rm, NFAVertex accept) {
803     vector<NFAEdge> dead;
804 
805     for (const auto &e : in_edges_range(accept, g)) {
806         NFAVertex v = source(e, g);
807         if (v == g.accept) {
808             assert(accept == g.acceptEod); // stylised edge
809             continue;
810         }
811 
812         if (!hasSameBounds(g[v].reports, rm)) {
813             continue;
814         }
815         const auto &report = rm.getReport(*g[v].reports.begin());
816 
817         u32 idx = g[v].index;
818         DepthMinMax d = depths[idx]; // copy
819         pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v);
820         DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
821                      d.str().c_str(), adj.first, adj.second);
822         d.min += adj.first;
823         d.max += adj.second;
824 
825         if (d.max.is_finite() && d.max < report.minLength) {
826             DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
827                          d.max.str().c_str(), report.minLength);
828             dead.push_back(e);
829             continue;
830         }
831 
832         if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) {
833             DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
834                          d.min.str().c_str(), report.maxOffset);
835             dead.push_back(e);
836             continue;
837         }
838     }
839 
840     remove_edges(dead, g);
841 }
842 
843 /**
844  * Remove edges to accepts that can never produce a match long enough to
845  * satisfy our min_length and max_offset constraints.
846  */
847 static
pruneUnmatchable(NGHolder & g,const ReportManager & rm)848 void pruneUnmatchable(NGHolder &g, const ReportManager &rm) {
849     if (!any_of_in(all_reports(g), [&](ReportID id) {
850             return rm.getReport(id).minLength > 0;
851         })) {
852         return;
853     }
854 
855     vector<DepthMinMax> depths = getDistancesFromSOM(g);
856 
857     pruneUnmatchable(g, depths, rm, g.accept);
858     pruneUnmatchable(g, depths, rm, g.acceptEod);
859 
860     pruneUseless(g);
861     clearReports(g);
862 }
863 
864 static
hasOffsetAdjustments(const ReportManager & rm,const NGHolder & g)865 bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
866     return any_of_in(all_reports(g), [&rm](ReportID id) {
867         return rm.getReport(id).offsetAdjust != 0;
868     });
869 }
870 
propagateExtendedParams(NGHolder & g,ExpressionInfo & expr,ReportManager & rm)871 void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr,
872                              ReportManager &rm) {
873     if (!hasExtParams(expr)) {
874         return;
875     }
876 
877     depth minWidth = findMinWidth(g);
878     depth maxWidth = findMaxWidth(g);
879     bool is_anchored = !has_proper_successor(g.startDs, g)
880                      && out_degree(g.start, g);
881 
882     DepthMinMax match_depths = findMatchLengths(rm, g);
883     DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
884 
885     if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) {
886         ostringstream oss;
887         oss << "Expression is anchored and cannot satisfy min_offset="
888             << expr.min_offset << " as it can only produce matches of length "
889             << maxWidth << " bytes at most.";
890         throw CompileError(expr.index, oss.str());
891     }
892 
893     if (minWidth > expr.max_offset) {
894         ostringstream oss;
895         oss << "Expression has max_offset=" << expr.max_offset
896             << " but requires " << minWidth << " bytes to match.";
897         throw CompileError(expr.index, oss.str());
898     }
899 
900     if (maxWidth.is_finite() && match_depths.max < expr.min_length) {
901         ostringstream oss;
902         oss << "Expression has min_length=" << expr.min_length << " but can "
903             "only produce matches of length " << match_depths.max <<
904             " bytes at most.";
905         throw CompileError(expr.index, oss.str());
906     }
907 
908     if (expr.min_length && expr.min_length <= match_depths.min) {
909         DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
910                      expr.min_length);
911         expr.min_length = 0;
912     }
913 
914     if (!hasExtParams(expr)) {
915         return;
916     }
917 
918     updateReportBounds(rm, g, expr);
919 }
920 
921 /**
922  * If the pattern is completely anchored and has a min_length set, this can
923  * be converted to a min_offset.
924  */
925 static
replaceMinLengthWithOffset(NGHolder & g,ReportManager & rm)926 void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) {
927     if (has_proper_successor(g.startDs, g)) {
928         return; // not wholly anchored
929     }
930 
931     replaceReports(g, [&rm](NFAVertex, ReportID id) {
932         const auto &report = rm.getReport(id);
933         if (report.minLength) {
934             Report new_report = report;
935             u64a min_len_offset = report.minLength - report.offsetAdjust;
936             new_report.minOffset = max(report.minOffset, min_len_offset);
937             new_report.minLength = 0;
938             return rm.getInternalId(new_report);
939         }
940         return id;
941     });
942 }
943 
944 /**
945  * Clear offset bounds on reports that are not needed because they're satisfied
946  * by vertex depth.
947  */
948 static
removeUnneededOffsetBounds(NGHolder & g,ReportManager & rm)949 void removeUnneededOffsetBounds(NGHolder &g, ReportManager &rm) {
950     auto depths = calcDepths(g);
951 
952     replaceReports(g, [&](NFAVertex v, ReportID id) {
953         const auto &d = depths.at(g[v].index);
954         const depth &min_depth = min(d.fromStartDotStar.min, d.fromStart.min);
955         const depth &max_depth = maxDistFromStartOfData(d);
956 
957         DEBUG_PRINTF("vertex %zu has min_depth=%s, max_depth=%s\n", g[v].index,
958                      min_depth.str().c_str(), max_depth.str().c_str());
959 
960         Report report = rm.getReport(id); // copy
961         bool modified = false;
962         if (report.minOffset && !report.offsetAdjust &&
963             report.minOffset <= min_depth) {
964             report.minOffset = 0;
965             modified = true;
966         }
967         if (report.maxOffset != MAX_OFFSET && max_depth.is_finite() &&
968             report.maxOffset >= max_depth) {
969             report.maxOffset = MAX_OFFSET;
970             modified = true;
971         }
972         if (modified) {
973             DEBUG_PRINTF("vertex %zu, changed bounds to [%llu,%llu]\n",
974                          g[v].index, report.minOffset, report.maxOffset);
975             return rm.getInternalId(report);
976         }
977 
978         return id;
979     });
980 }
981 
reduceExtendedParams(NGHolder & g,ReportManager & rm,som_type som)982 void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) {
983     if (!any_of_in(all_reports(g),
984                    [&](ReportID id) { return rm.getReport(id).hasBounds(); })) {
985         DEBUG_PRINTF("no extparam bounds\n");
986         return;
987     }
988 
989     DEBUG_PRINTF("graph has extparam bounds\n");
990 
991     pruneVacuousEdges(g, rm);
992     if (can_never_match(g)) {
993         return;
994     }
995 
996     pruneUnmatchable(g, rm);
997     if (can_never_match(g)) {
998         return;
999     }
1000 
1001     if (!hasOffsetAdjustments(rm, g)) {
1002         pruneExtUnreachable(g, rm);
1003         if (can_never_match(g)) {
1004             return;
1005         }
1006     }
1007 
1008     replaceMinLengthWithOffset(g, rm);
1009     if (can_never_match(g)) {
1010         return;
1011     }
1012 
1013     // If the pattern has a min_length and is of "ratchet" form with one
1014     // unbounded repeat, that repeat can become a bounded repeat.
1015     // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
1016     transformMinLengthToRepeat(g, rm);
1017     if (can_never_match(g)) {
1018         return;
1019     }
1020 
1021     // If the pattern is unanchored, has a max_offset and has not asked for
1022     // SOM, we can use that knowledge to anchor it which will limit its
1023     // lifespan. Note that we can't use this transformation if there's a
1024     // min_length, as it's currently handled using "sly SOM".
1025     if (som == SOM_NONE) {
1026         anchorPatternWithBoundedRepeat(g, rm);
1027         if (can_never_match(g)) {
1028             return;
1029         }
1030     }
1031 
1032     removeUnneededOffsetBounds(g, rm);
1033 }
1034 
1035 } // namespace ue2
1036