1 /*
2  * Copyright (c) 2015, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Base class for all components.
31  */
32 
33 #ifndef _RE_COMPONENT_H_
34 #define _RE_COMPONENT_H_
35 
36 #include "ComponentVisitor.h"
37 #include "ConstComponentVisitor.h"
38 
39 #include "position.h"
40 #include "ue2common.h"
41 
42 #include <set>
43 #include <string>
44 #include <vector>
45 
46 namespace ue2 {
47 
48 class GlushkovBuildState;
49 class PositionInfo;
50 
51 enum EmptyPathType {
52     NOT_EMPTY,       /**< component must consume characters */
53     EPS_ONLY_PATHS,  /**< eps path with no overhanging asserts */
54     BOUNDARY_PATHS   /**< eps paths some with overhanging asserts */
55 };
56 
57 /** \brief Base class for regular expression parse tree components. */
58 class Component {
59     friend class DumpVisitor;
60 public:
61     /** \brief Constructor. */
62     Component();
63 
64     /** \brief Destructor. */
65     virtual ~Component();
66 
67     /** \brief Returns a newly-allocated deep copy of this component. */
68     virtual Component *clone() const = 0;
69 
70     /** \brief Apply the given visitor functor. */
71     virtual Component *accept(ComponentVisitor &v) = 0;
72 
73     /** \brief Apply the given const visitor functor. */
74     virtual void accept(ConstComponentVisitor &v) const = 0;
75 
76     /** \brief Glushkov construction First() function.
77      * \return set of initial positions in this component. */
78     virtual std::vector<PositionInfo> first() const = 0;
79 
80     /** \brief Glushkov construction Last() function.
81      * \return set of final positions in this component. */
82     virtual std::vector<PositionInfo> last() const = 0;
83 
84     /** \brief Glushkov construction Empty() function.
85      * \return true iff the component accepts epsilon.
86      *
87      * Note: ^, $, etc are considered empty. */
88     virtual bool empty() const = 0;
89 
90     /** \brief True iff epsilon can pass through the component.
91      *
92      * Note: ^, $, etc are not vacuous everywhere. */
93     virtual bool vacuous_everywhere(void) const;
94 
95     /** \brief True iff the component is repeatable on its own, without being
96      * encapsulated in a sequence first.
97      *
98      * This is true for most components, but not for repeats, anchors and word
99      * boundaries. */
100     virtual bool repeatable() const;
101 
102     /** \brief Optimisation pass on the component tree.
103      *
104      * Called before \ref notePositions. May modify to the component tree.
105      * Assumes no start of match information is required.
106      */
107     virtual void optimise(bool connected_to_sds);
108 
109     /** \brief Informs the Glushkov build process of the positions used by this
110      * component. */
111     virtual void notePositions(GlushkovBuildState &bs) = 0;
112 
113     /** \brief Glushkov construction Follow() function.
114      *
115      * Constructs (in \a bs) the set of positions in this component reachable
116      * from the positions in \a lastPos.
117      *
118      * \throw ParseError on failure
119      */
120     virtual void buildFollowSet(GlushkovBuildState &bs,
121                                 const std::vector<PositionInfo> &lastPos) = 0;
122 
123     /** \brief Return value is used for chaining, throws if finds embedded
124      * anchor. */
125     virtual bool checkEmbeddedStartAnchor(bool at_start) const;
126 
127     /* \brief Return value is used for chaining, throws if finds embedded
128      * anchor. */
129     virtual bool checkEmbeddedEndAnchor(bool at_end) const;
130 
131 protected:
132     /** \brief Called during \ref notePositions. */
133     void recordPosBounds(u32 b, u32 e);
134 
135     u32 pos_begin;
136     u32 pos_end;
137 
138     // Protected copy ctor. Use clone instead.
Component(const Component & other)139     Component(const Component &other)
140         : pos_begin(other.pos_begin), pos_end(other.pos_end) {}
141 };
142 
143 } // namespace ue2
144 
145 #endif
146