1 /* 2 * Copyright (c) 2015, Intel Corporation 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * * Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. 9 * * Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * * Neither the name of Intel Corporation nor the names of its contributors 13 * may be used to endorse or promote products derived from this software 14 * without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /** \file 30 * \brief Base class for all components. 31 */ 32 33 #ifndef _RE_COMPONENT_H_ 34 #define _RE_COMPONENT_H_ 35 36 #include "ComponentVisitor.h" 37 #include "ConstComponentVisitor.h" 38 39 #include "position.h" 40 #include "ue2common.h" 41 42 #include <set> 43 #include <string> 44 #include <vector> 45 46 namespace ue2 { 47 48 class GlushkovBuildState; 49 class PositionInfo; 50 51 enum EmptyPathType { 52 NOT_EMPTY, /**< component must consume characters */ 53 EPS_ONLY_PATHS, /**< eps path with no overhanging asserts */ 54 BOUNDARY_PATHS /**< eps paths some with overhanging asserts */ 55 }; 56 57 /** \brief Base class for regular expression parse tree components. */ 58 class Component { 59 friend class DumpVisitor; 60 public: 61 /** \brief Constructor. */ 62 Component(); 63 64 /** \brief Destructor. */ 65 virtual ~Component(); 66 67 /** \brief Returns a newly-allocated deep copy of this component. */ 68 virtual Component *clone() const = 0; 69 70 /** \brief Apply the given visitor functor. */ 71 virtual Component *accept(ComponentVisitor &v) = 0; 72 73 /** \brief Apply the given const visitor functor. */ 74 virtual void accept(ConstComponentVisitor &v) const = 0; 75 76 /** \brief Glushkov construction First() function. 77 * \return set of initial positions in this component. */ 78 virtual std::vector<PositionInfo> first() const = 0; 79 80 /** \brief Glushkov construction Last() function. 81 * \return set of final positions in this component. */ 82 virtual std::vector<PositionInfo> last() const = 0; 83 84 /** \brief Glushkov construction Empty() function. 85 * \return true iff the component accepts epsilon. 86 * 87 * Note: ^, $, etc are considered empty. */ 88 virtual bool empty() const = 0; 89 90 /** \brief True iff epsilon can pass through the component. 91 * 92 * Note: ^, $, etc are not vacuous everywhere. */ 93 virtual bool vacuous_everywhere(void) const; 94 95 /** \brief True iff the component is repeatable on its own, without being 96 * encapsulated in a sequence first. 97 * 98 * This is true for most components, but not for repeats, anchors and word 99 * boundaries. */ 100 virtual bool repeatable() const; 101 102 /** \brief Optimisation pass on the component tree. 103 * 104 * Called before \ref notePositions. May modify to the component tree. 105 * Assumes no start of match information is required. 106 */ 107 virtual void optimise(bool connected_to_sds); 108 109 /** \brief Informs the Glushkov build process of the positions used by this 110 * component. */ 111 virtual void notePositions(GlushkovBuildState &bs) = 0; 112 113 /** \brief Glushkov construction Follow() function. 114 * 115 * Constructs (in \a bs) the set of positions in this component reachable 116 * from the positions in \a lastPos. 117 * 118 * \throw ParseError on failure 119 */ 120 virtual void buildFollowSet(GlushkovBuildState &bs, 121 const std::vector<PositionInfo> &lastPos) = 0; 122 123 /** \brief Return value is used for chaining, throws if finds embedded 124 * anchor. */ 125 virtual bool checkEmbeddedStartAnchor(bool at_start) const; 126 127 /* \brief Return value is used for chaining, throws if finds embedded 128 * anchor. */ 129 virtual bool checkEmbeddedEndAnchor(bool at_end) const; 130 131 protected: 132 /** \brief Called during \ref notePositions. */ 133 void recordPosBounds(u32 b, u32 e); 134 135 u32 pos_begin; 136 u32 pos_end; 137 138 // Protected copy ctor. Use clone instead. Component(const Component & other)139 Component(const Component &other) 140 : pos_begin(other.pos_begin), pos_end(other.pos_end) {} 141 }; 142 143 } // namespace ue2 144 145 #endif 146