1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Bounded repeat analysis.
31  */
32 
33 #ifndef NG_REPEAT_H
34 #define NG_REPEAT_H
35 
36 #include "ng_holder.h"
37 #include "ue2common.h"
38 #include "nfa/repeat_internal.h"
39 #include "util/depth.h"
40 #include "util/flat_containers.h"
41 
42 #include <map>
43 #include <vector>
44 
45 namespace ue2 {
46 
47 class NGHolder;
48 class ReportManager;
49 struct Grey;
50 
51 /**
52  * \brief Everything you need to know about a bounded repeat that we have
53  * transformed.
54  */
55 struct BoundedRepeatData {
BoundedRepeatDataBoundedRepeatData56     BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z,
57                       u32 minPeriod_in, NFAVertex cyc, NFAVertex pos,
58                       const std::vector<NFAVertex> &tug_in)
59         : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in),
60           cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {}
61 
62     BoundedRepeatData() = delete; // no default construction allowed.
63 
64     enum RepeatType type;  //!< selected type based on bounds and structure
65     depth repeatMin;       //!< minimum repeat bound
66     depth repeatMax;       //!< maximum repeat bound
67     u32 minPeriod;         //!< min trigger period
68     NFAVertex cyclic;      //!< cyclic vertex representing repeat in graph
69     NFAVertex pos_trigger; //!< positive trigger vertex
70     std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices
71 };
72 
73 /**
74  * \brief Run the bounded repeat analysis and transform the graph where
75  * bounded repeats are found.
76  *
77  * \param h
78  *      Graph to operate on.
79  * \param rm
80  *      ReportManager, or nullptr if the graph's reports are internal (e.g. for
81  *      Rose use).
82  * \param fixed_depth_tops
83  *      Map of top to possible trigger depth.
84  * \param triggers
85  *      Map of top to the vector of triggers (i.e. preceding literals/masks)
86  * \param repeats
87  *      Repeat info is filled in for caller here.
88  * \param streaming
89  *      True if we're in streaming mode.
90  * \param simple_model_selection
91  *      Don't perform complex (and slow) model selection analysis, e.g.
92  *      determining whether the repeat is sole entry.
93  * \param grey
94  *      Grey box object.
95  * \param reformed_start_ds
96  *      If supplied, this will be set to true if the graph was optimised for a
97  *      leading first repeat, resulting in the output graph having no self-loop
98  *      on startDs.
99  */
100 void analyseRepeats(NGHolder &h, const ReportManager *rm,
101             const std::map<u32, u32> &fixed_depth_tops,
102             const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
103             std::vector<BoundedRepeatData> *repeats, bool streaming,
104             bool simple_model_selection, const Grey &grey,
105             bool *reformed_start_ds = nullptr);
106 
107 /**
108  * \brief Information on repeats in a holder, returned from \ref findRepeats.
109  */
110 struct GraphRepeatInfo {
111     depth repeatMin; /**< minimum bound */
112     depth repeatMax; /**< effective max bound */
113     std::vector<NFAVertex> vertices; /**< vertices involved in repeat */
114 };
115 
116 /**
117  * \brief Provides information on repeats in the graph.
118  */
119 void findRepeats(const NGHolder &h, u32 minRepeatVertices,
120                  std::vector<GraphRepeatInfo> *repeats_out);
121 
122 struct PureRepeat {
123     CharReach reach;
124     DepthMinMax bounds;
125     flat_set<ReportID> reports;
126 
127     bool operator==(const PureRepeat &a) const {
128         return reach == a.reach && bounds == a.bounds && reports == a.reports;
129     }
130 
131     bool operator!=(const PureRepeat &a) const { return !(*this == a); }
132 
133     bool operator<(const PureRepeat &a) const {
134         if (reach != a.reach) {
135             return reach < a.reach;
136         }
137         if (bounds != a.bounds) {
138             return bounds < a.bounds;
139         }
140         return reports < a.reports;
141     }
142 };
143 
144 /**
145  * \brief Returns true and fills the given PureRepeat structure if the graph is
146  * wholly a repeat over a single character class.
147  *
148  * For example, something like:
149  *
150  *     /^[a-z]{10,20}/
151  *
152  * - Note: graph must not use SDS or EOD.
153  * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper
154  *   bound on the repeat.
155  */
156 bool isPureRepeat(const NGHolder &h, PureRepeat &r);
157 
158 } // namespace ue2
159 
160 #endif // NG_REPEAT_H
161