1 /*
2  * Copyright (c) 2015, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Boundary assertions (^, $, \\A, \\Z, \\z)
31  */
32 
33 
34 #include "ComponentBoundary.h"
35 
36 #include "buildstate.h"
37 #include "parse_error.h"
38 #include "position.h"
39 #include "position_info.h"
40 #include "Parser.h"
41 #include "util/charreach.h"
42 #include "nfagraph/ng_builder.h"
43 
44 #include <cassert>
45 
46 using namespace std;
47 
48 namespace ue2 {
49 
ComponentBoundary(enum Boundary bound)50 ComponentBoundary::ComponentBoundary(enum Boundary bound)
51     : m_bound(bound), m_newline(GlushkovBuildState::POS_UNINITIALIZED) {}
52 
~ComponentBoundary()53 ComponentBoundary::~ComponentBoundary() {
54 }
55 
ComponentBoundary(const ComponentBoundary & other)56 ComponentBoundary::ComponentBoundary(const ComponentBoundary &other)
57     : Component(other), m_bound(other.m_bound), m_newline(other.m_newline),
58       m_first(other.m_first), m_last(other.m_last) {}
59 
clone() const60 ComponentBoundary * ComponentBoundary::clone() const {
61     return new ComponentBoundary(*this);
62 }
63 
first() const64 vector<PositionInfo> ComponentBoundary::first() const {
65     return m_first;
66 }
67 
last() const68 vector<PositionInfo> ComponentBoundary::last() const {
69     return m_last;
70 }
71 
empty() const72 bool ComponentBoundary::empty() const {
73     return true;
74 }
75 
repeatable() const76 bool ComponentBoundary::repeatable() const {
77     return false;
78 }
79 
80 static
makeNewline(GlushkovBuildState & bs)81 Position makeNewline(GlushkovBuildState &bs) {
82     NFABuilder &builder = bs.getBuilder();
83     Position newline = builder.makePositions(1);
84     builder.addCharReach(newline, CharReach('\n'));
85     return newline;
86 }
87 
notePositions(GlushkovBuildState & bs)88 void ComponentBoundary::notePositions(GlushkovBuildState & bs) {
89     NFABuilder &builder = bs.getBuilder();
90     const Position startState = builder.getStart();
91 
92     switch (m_bound) {
93     case BEGIN_STRING: // beginning of data stream ('^')
94     {
95         PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
96         epsilon.flags = POS_FLAG_NOFLOAT;
97         m_first.push_back(epsilon);
98 
99         // We have the start vertex in firsts so that we can discourage
100         // the mid-pattern use of boundaries.
101         m_first.push_back(startState);
102 
103         break;
104     }
105     case BEGIN_LINE: // multiline anchor: beginning of stream or a newline
106     {
107         PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
108         epsilon.flags = POS_FLAG_NOFLOAT;
109         m_first.push_back(epsilon);
110 
111         // We have the start vertex in firsts so that we can discourage
112         // the mid-pattern use of boundaries.
113         m_first.push_back(startState);
114 
115         // Newline
116         m_newline = makeNewline(bs);
117         builder.setAssertFlag(m_newline, POS_FLAG_MULTILINE_START);
118         builder.setAssertFlag(m_newline, POS_FLAG_VIRTUAL_START);
119         PositionInfo nl(m_newline);
120         nl.flags = POS_FLAG_MUST_FLOAT | POS_FLAG_FIDDLE_ACCEPT;
121         m_first.push_back(nl);
122         m_last.push_back(nl);
123         recordPosBounds(m_newline, m_newline + 1);
124         break;
125     }
126     case END_STRING: // end of data stream ('\z')
127     {
128         PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
129         epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_NO_NL_EOD |
130                         POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS;
131         m_first.push_back(epsilon);
132         break;
133     }
134     case END_STRING_OPTIONAL_LF: // end of data with optional LF ('$')
135     {
136         PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
137         epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD |
138                         POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS;
139         m_first.push_back(epsilon);
140         break;
141     }
142     case END_LINE: // multiline anchor: end of data or a newline
143     {
144         PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
145         epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD |
146                         POS_FLAG_WIRE_NL_ACCEPT | POS_FLAG_ONLY_ENDS;
147         m_first.push_back(epsilon);
148         break;
149     }
150     default:
151         // unsupported
152         assert(0);
153         break;
154     }
155 }
156 
buildFollowSet(GlushkovBuildState &,const vector<PositionInfo> &)157 void ComponentBoundary::buildFollowSet(GlushkovBuildState &,
158                                        const vector<PositionInfo> &) {
159 
160 }
161 
checkEmbeddedStartAnchor(bool at_start) const162 bool ComponentBoundary::checkEmbeddedStartAnchor(bool at_start) const {
163     if (at_start) {
164         return at_start;
165     }
166 
167     if (m_bound == BEGIN_STRING || m_bound == BEGIN_LINE) {
168         throw ParseError("Embedded start anchors not supported.");
169     }
170 
171     return at_start;
172 }
173 
checkEmbeddedEndAnchor(bool at_end) const174 bool ComponentBoundary::checkEmbeddedEndAnchor(bool at_end) const {
175     if (at_end) {
176         return at_end;
177     }
178 
179     if (m_bound != BEGIN_STRING && m_bound != BEGIN_LINE) {
180         throw ParseError("Embedded end anchors not supported.");
181     }
182 
183     return at_end;
184 }
185 
186 } // namespace
187