1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 /**
7 * Lexical analyzer for XPath expressions
8 */
9
10 #include "txExprLexer.h"
11 #include "nsGkAtoms.h"
12 #include "nsString.h"
13 #include "nsError.h"
14 #include "txXMLUtils.h"
15
16 /**
17 * Creates a new ExprLexer
18 */
txExprLexer()19 txExprLexer::txExprLexer()
20 : mPosition(nullptr),
21 mCurrentItem(nullptr),
22 mFirstItem(nullptr),
23 mLastItem(nullptr),
24 mTokenCount(0) {}
25
26 /**
27 * Destroys this instance of an txExprLexer
28 */
~txExprLexer()29 txExprLexer::~txExprLexer() {
30 //-- delete tokens
31 Token* tok = mFirstItem;
32 while (tok) {
33 Token* temp = tok->mNext;
34 delete tok;
35 tok = temp;
36 }
37 mCurrentItem = nullptr;
38 }
39
nextToken()40 Token* txExprLexer::nextToken() {
41 if (!mCurrentItem) {
42 MOZ_ASSERT_UNREACHABLE("nextToken called on uninitialized lexer");
43 return nullptr;
44 }
45
46 if (mCurrentItem->mType == Token::END) {
47 // Do not progress beyond the end token
48 return mCurrentItem;
49 }
50
51 Token* token = mCurrentItem;
52 mCurrentItem = mCurrentItem->mNext;
53 return token;
54 }
55
addToken(Token * aToken)56 void txExprLexer::addToken(Token* aToken) {
57 if (mLastItem) {
58 mLastItem->mNext = aToken;
59 }
60 if (!mFirstItem) {
61 mFirstItem = aToken;
62 mCurrentItem = aToken;
63 }
64 mLastItem = aToken;
65 ++mTokenCount;
66 }
67
68 /**
69 * Returns true if the following Token should be an operator.
70 * This is a helper for the first bullet of [XPath 3.7]
71 * Lexical Structure
72 */
nextIsOperatorToken(Token * aToken)73 bool txExprLexer::nextIsOperatorToken(Token* aToken) {
74 if (!aToken || aToken->mType == Token::NULL_TOKEN) {
75 return false;
76 }
77 /* This relies on the tokens having the right order in txExprLexer.h */
78 return aToken->mType < Token::COMMA || aToken->mType > Token::UNION_OP;
79 }
80
81 /**
82 * Parses the given string into a sequence of Tokens
83 */
parse(const nsAString & aPattern)84 nsresult txExprLexer::parse(const nsAString& aPattern) {
85 iterator end;
86 aPattern.BeginReading(mPosition);
87 aPattern.EndReading(end);
88
89 //-- initialize previous token, this will automatically get
90 //-- deleted when it goes out of scope
91 Token nullToken(nullptr, nullptr, Token::NULL_TOKEN);
92
93 Token::Type defType;
94 Token* newToken = nullptr;
95 Token* prevToken = &nullToken;
96 bool isToken;
97
98 while (mPosition < end) {
99 defType = Token::CNAME;
100 isToken = true;
101
102 if (*mPosition == DOLLAR_SIGN) {
103 if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
104 return NS_ERROR_XPATH_INVALID_VAR_NAME;
105 }
106 defType = Token::VAR_REFERENCE;
107 }
108 // just reuse the QName parsing, which will use defType
109 // the token to construct
110
111 if (XMLUtils::isLetter(*mPosition)) {
112 // NCName, can get QName or OperatorName;
113 // FunctionName, NodeName, and AxisSpecifier may want whitespace,
114 // and are dealt with below
115 iterator start = mPosition;
116 while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
117 /* just go */
118 }
119 if (mPosition < end && *mPosition == COLON) {
120 // try QName or wildcard, might need to step back for axis
121 if (++mPosition == end) {
122 return NS_ERROR_XPATH_UNEXPECTED_END;
123 }
124 if (XMLUtils::isLetter(*mPosition)) {
125 while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
126 /* just go */
127 }
128 } else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
129 // eat wildcard for NameTest, bail for var ref at COLON
130 ++mPosition;
131 } else {
132 --mPosition; // step back
133 }
134 }
135 if (nextIsOperatorToken(prevToken)) {
136 nsDependentSubstring op(Substring(start, mPosition));
137 if (nsGkAtoms::_and->Equals(op)) {
138 defType = Token::AND_OP;
139 } else if (nsGkAtoms::_or->Equals(op)) {
140 defType = Token::OR_OP;
141 } else if (nsGkAtoms::mod->Equals(op)) {
142 defType = Token::MODULUS_OP;
143 } else if (nsGkAtoms::div->Equals(op)) {
144 defType = Token::DIVIDE_OP;
145 } else {
146 // XXX QUESTION: spec is not too precise
147 // badops is sure an error, but is bad:ops, too? We say yes!
148 return NS_ERROR_XPATH_OPERATOR_EXPECTED;
149 }
150 }
151 newToken = new Token(start, mPosition, defType);
152 } else if (isXPathDigit(*mPosition)) {
153 iterator start = mPosition;
154 while (++mPosition < end && isXPathDigit(*mPosition)) {
155 /* just go */
156 }
157 if (mPosition < end && *mPosition == '.') {
158 while (++mPosition < end && isXPathDigit(*mPosition)) {
159 /* just go */
160 }
161 }
162 newToken = new Token(start, mPosition, Token::NUMBER);
163 } else {
164 switch (*mPosition) {
165 //-- ignore whitespace
166 case SPACE:
167 case TX_TAB:
168 case TX_CR:
169 case TX_LF:
170 ++mPosition;
171 isToken = false;
172 break;
173 case S_QUOTE:
174 case D_QUOTE: {
175 iterator start = mPosition;
176 while (++mPosition < end && *mPosition != *start) {
177 // eat literal
178 }
179 if (mPosition == end) {
180 mPosition = start;
181 return NS_ERROR_XPATH_UNCLOSED_LITERAL;
182 }
183 newToken = new Token(start + 1, mPosition, Token::LITERAL);
184 ++mPosition;
185 } break;
186 case PERIOD:
187 // period can be .., .(DIGITS)+ or ., check next
188 if (++mPosition == end) {
189 newToken = new Token(mPosition - 1, Token::SELF_NODE);
190 } else if (isXPathDigit(*mPosition)) {
191 iterator start = mPosition - 1;
192 while (++mPosition < end && isXPathDigit(*mPosition)) {
193 /* just go */
194 }
195 newToken = new Token(start, mPosition, Token::NUMBER);
196 } else if (*mPosition == PERIOD) {
197 ++mPosition;
198 newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
199 } else {
200 newToken = new Token(mPosition - 1, Token::SELF_NODE);
201 }
202 break;
203 case COLON: // QNames are dealt above, must be axis ident
204 if (++mPosition >= end || *mPosition != COLON ||
205 prevToken->mType != Token::CNAME) {
206 return NS_ERROR_XPATH_BAD_COLON;
207 }
208 prevToken->mType = Token::AXIS_IDENTIFIER;
209 ++mPosition;
210 isToken = false;
211 break;
212 case FORWARD_SLASH:
213 if (++mPosition < end && *mPosition == FORWARD_SLASH) {
214 ++mPosition;
215 newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
216 } else {
217 newToken = new Token(mPosition - 1, Token::PARENT_OP);
218 }
219 break;
220 case BANG: // can only be !=
221 if (++mPosition < end && *mPosition == EQUAL) {
222 ++mPosition;
223 newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
224 break;
225 }
226 // Error ! is not not()
227 return NS_ERROR_XPATH_BAD_BANG;
228 case EQUAL:
229 newToken = new Token(mPosition, Token::EQUAL_OP);
230 ++mPosition;
231 break;
232 case L_ANGLE:
233 if (++mPosition == end) {
234 return NS_ERROR_XPATH_UNEXPECTED_END;
235 }
236 if (*mPosition == EQUAL) {
237 ++mPosition;
238 newToken =
239 new Token(mPosition - 2, mPosition, Token::LESS_OR_EQUAL_OP);
240 } else {
241 newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
242 }
243 break;
244 case R_ANGLE:
245 if (++mPosition == end) {
246 return NS_ERROR_XPATH_UNEXPECTED_END;
247 }
248 if (*mPosition == EQUAL) {
249 ++mPosition;
250 newToken =
251 new Token(mPosition - 2, mPosition, Token::GREATER_OR_EQUAL_OP);
252 } else {
253 newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
254 }
255 break;
256 case HYPHEN:
257 newToken = new Token(mPosition, Token::SUBTRACTION_OP);
258 ++mPosition;
259 break;
260 case ASTERISK:
261 if (nextIsOperatorToken(prevToken)) {
262 newToken = new Token(mPosition, Token::MULTIPLY_OP);
263 } else {
264 newToken = new Token(mPosition, Token::CNAME);
265 }
266 ++mPosition;
267 break;
268 case L_PAREN:
269 if (prevToken->mType == Token::CNAME) {
270 const nsDependentSubstring& val = prevToken->Value();
271 if (val.EqualsLiteral("comment")) {
272 prevToken->mType = Token::COMMENT_AND_PAREN;
273 } else if (val.EqualsLiteral("node")) {
274 prevToken->mType = Token::NODE_AND_PAREN;
275 } else if (val.EqualsLiteral("processing-instruction")) {
276 prevToken->mType = Token::PROC_INST_AND_PAREN;
277 } else if (val.EqualsLiteral("text")) {
278 prevToken->mType = Token::TEXT_AND_PAREN;
279 } else {
280 prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
281 }
282 isToken = false;
283 } else {
284 newToken = new Token(mPosition, Token::L_PAREN);
285 }
286 ++mPosition;
287 break;
288 case R_PAREN:
289 newToken = new Token(mPosition, Token::R_PAREN);
290 ++mPosition;
291 break;
292 case L_BRACKET:
293 newToken = new Token(mPosition, Token::L_BRACKET);
294 ++mPosition;
295 break;
296 case R_BRACKET:
297 newToken = new Token(mPosition, Token::R_BRACKET);
298 ++mPosition;
299 break;
300 case COMMA:
301 newToken = new Token(mPosition, Token::COMMA);
302 ++mPosition;
303 break;
304 case AT_SIGN:
305 newToken = new Token(mPosition, Token::AT_SIGN);
306 ++mPosition;
307 break;
308 case PLUS:
309 newToken = new Token(mPosition, Token::ADDITION_OP);
310 ++mPosition;
311 break;
312 case VERT_BAR:
313 newToken = new Token(mPosition, Token::UNION_OP);
314 ++mPosition;
315 break;
316 default:
317 // Error, don't grok character :-(
318 return NS_ERROR_XPATH_ILLEGAL_CHAR;
319 }
320 }
321 if (isToken) {
322 NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
323 prevToken = newToken;
324 addToken(newToken);
325 }
326 }
327
328 // add a endToken to the list
329 newToken = new Token(end, end, Token::END);
330 addToken(newToken);
331
332 return NS_OK;
333 }
334