1 /* 2 * Copyright (c) 2015-2017, Intel Corporation 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * * Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. 9 * * Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * * Neither the name of Intel Corporation nor the names of its contributors 13 * may be used to endorse or promote products derived from this software 14 * without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /** \file 30 * \brief Bounded repeat analysis. 31 */ 32 33 #ifndef NG_REPEAT_H 34 #define NG_REPEAT_H 35 36 #include "ng_holder.h" 37 #include "ue2common.h" 38 #include "nfa/repeat_internal.h" 39 #include "util/depth.h" 40 #include "util/flat_containers.h" 41 42 #include <map> 43 #include <vector> 44 45 namespace ue2 { 46 47 class NGHolder; 48 class ReportManager; 49 struct Grey; 50 51 /** 52 * \brief Everything you need to know about a bounded repeat that we have 53 * transformed. 54 */ 55 struct BoundedRepeatData { BoundedRepeatDataBoundedRepeatData56 BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z, 57 u32 minPeriod_in, NFAVertex cyc, NFAVertex pos, 58 const std::vector<NFAVertex> &tug_in) 59 : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in), 60 cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {} 61 62 BoundedRepeatData() = delete; // no default construction allowed. 63 64 enum RepeatType type; //!< selected type based on bounds and structure 65 depth repeatMin; //!< minimum repeat bound 66 depth repeatMax; //!< maximum repeat bound 67 u32 minPeriod; //!< min trigger period 68 NFAVertex cyclic; //!< cyclic vertex representing repeat in graph 69 NFAVertex pos_trigger; //!< positive trigger vertex 70 std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices 71 }; 72 73 /** 74 * \brief Run the bounded repeat analysis and transform the graph where 75 * bounded repeats are found. 76 * 77 * \param h 78 * Graph to operate on. 79 * \param rm 80 * ReportManager, or nullptr if the graph's reports are internal (e.g. for 81 * Rose use). 82 * \param fixed_depth_tops 83 * Map of top to possible trigger depth. 84 * \param triggers 85 * Map of top to the vector of triggers (i.e. preceding literals/masks) 86 * \param repeats 87 * Repeat info is filled in for caller here. 88 * \param streaming 89 * True if we're in streaming mode. 90 * \param simple_model_selection 91 * Don't perform complex (and slow) model selection analysis, e.g. 92 * determining whether the repeat is sole entry. 93 * \param grey 94 * Grey box object. 95 * \param reformed_start_ds 96 * If supplied, this will be set to true if the graph was optimised for a 97 * leading first repeat, resulting in the output graph having no self-loop 98 * on startDs. 99 */ 100 void analyseRepeats(NGHolder &h, const ReportManager *rm, 101 const std::map<u32, u32> &fixed_depth_tops, 102 const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, 103 std::vector<BoundedRepeatData> *repeats, bool streaming, 104 bool simple_model_selection, const Grey &grey, 105 bool *reformed_start_ds = nullptr); 106 107 /** 108 * \brief Information on repeats in a holder, returned from \ref findRepeats. 109 */ 110 struct GraphRepeatInfo { 111 depth repeatMin; /**< minimum bound */ 112 depth repeatMax; /**< effective max bound */ 113 std::vector<NFAVertex> vertices; /**< vertices involved in repeat */ 114 }; 115 116 /** 117 * \brief Provides information on repeats in the graph. 118 */ 119 void findRepeats(const NGHolder &h, u32 minRepeatVertices, 120 std::vector<GraphRepeatInfo> *repeats_out); 121 122 struct PureRepeat { 123 CharReach reach; 124 DepthMinMax bounds; 125 flat_set<ReportID> reports; 126 127 bool operator==(const PureRepeat &a) const { 128 return reach == a.reach && bounds == a.bounds && reports == a.reports; 129 } 130 131 bool operator!=(const PureRepeat &a) const { return !(*this == a); } 132 133 bool operator<(const PureRepeat &a) const { 134 if (reach != a.reach) { 135 return reach < a.reach; 136 } 137 if (bounds != a.bounds) { 138 return bounds < a.bounds; 139 } 140 return reports < a.reports; 141 } 142 }; 143 144 /** 145 * \brief Returns true and fills the given PureRepeat structure if the graph is 146 * wholly a repeat over a single character class. 147 * 148 * For example, something like: 149 * 150 * /^[a-z]{10,20}/ 151 * 152 * - Note: graph must not use SDS or EOD. 153 * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper 154 * bound on the repeat. 155 */ 156 bool isPureRepeat(const NGHolder &h, PureRepeat &r); 157 158 } // namespace ue2 159 160 #endif // NG_REPEAT_H 161