1 /*
2  * Copyright (c) 2015, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Repeats ('*', '+', '?', '{M,N}', etc)
31  */
32 
33 #ifndef RE_COMPONENTREPEAT_H
34 #define RE_COMPONENTREPEAT_H
35 
36 #include "Component.h"
37 #include "position.h"
38 #include "ue2common.h"
39 
40 #include <memory>
41 #include <utility>
42 
43 namespace ue2 {
44 
45 /**
46  * \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}',
47  * etc).
48  *
49  * ASCII Art Time:
50  *
51  * Our standard representation of standard repeats. Other constructions (fan-in
52  * vs fan-out) would also be possible and equivalent for our purposes.
53  *
54  * {n,m}
55  *
56  *     S->M->M->M->O->O->O->T
57  *              |     ^  ^  ^
58  *              |     |  |  |
59  *              \-----------/
60  *
61  * {0,m}
62  *
63  *     /-----------\
64  *     |           |
65  *     |           V
66  *     S->O->O->O->T
67  *        |  ^  ^  ^
68  *        |  |  |  |
69  *        \--------/
70  *
71  */
72 class ComponentRepeat : public Component {
73     friend class ConstructLiteralVisitor;
74     friend class DumpVisitor;
75     friend class PrintVisitor;
76     friend class SimplifyVisitor;
77 public:
78     /** \brief Value representing no maximum bound. */
79     static constexpr u32 NoLimit = 0xffffffff;
80 
81     /** \brief Type of this repeat, characterising its
82      * greediness/possessiveness. */
83     enum RepeatType {
84         /** Minimising repeat, like 'a*?'. */
85         REPEAT_NONGREEDY,
86         /** Maximising repeat, like 'a*'. This is the default in PCRE. */
87         REPEAT_GREEDY,
88         /** Possessive, maximising repeat, like 'a*+'. Possessive repeats are
89          * only currently supported in prefiltering mode, where we treat them
90          * the same way we treat normal greedy repeats. */
91         REPEAT_POSSESSIVE,
92     };
93 
94     ComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max,
95                     RepeatType t);
96     ~ComponentRepeat() override;
97     ComponentRepeat *clone() const override;
98     Component *accept(ComponentVisitor &v) override;
99     void accept(ConstComponentVisitor &v) const override;
100 
101     std::vector<PositionInfo> first() const override;
102     std::vector<PositionInfo> last() const override;
103     bool empty() const override;
104     bool repeatable() const override;
105     bool vacuous_everywhere() const override;
106     void notePositions(GlushkovBuildState &bs) override;
107     void buildFollowSet(GlushkovBuildState &bs,
108                         const std::vector<PositionInfo> &lastPos) override;
109     bool checkEmbeddedStartAnchor(bool at_start) const override;
110     bool checkEmbeddedEndAnchor(bool at_end) const override;
111 
112     void optimise(bool connected_to_sds) override;
113 
getBounds()114     virtual std::pair<u32, u32> getBounds() const {
115         return std::make_pair(m_min, m_max);
116     }
117 
118     /** \brief From declared behaviour (not taking into account the
119      * sub-component).  */
120     enum RepeatType type;
121 
122 protected:
123     void postSubNotePositionHook();
124     void wireRepeats(GlushkovBuildState &bs);
125 
126     std::unique_ptr<Component> sub_comp;
127     u32 m_min;
128     u32 m_max;
129 
130     std::vector<std::vector<PositionInfo> > m_firsts;
131     std::vector<std::vector<PositionInfo> > m_lasts;
132     Position posFirst;
133     Position posLast;
134 
135     ComponentRepeat(const ComponentRepeat &other);
136 };
137 
138 std::unique_ptr<ComponentRepeat>
139 makeComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max,
140                     ComponentRepeat::RepeatType t);
141 
142 } // namespace ue2
143 
144 #endif // _RE_COMPONENTREPEAT_H_
145