1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 /**
7  * Lexical analyzer for XPath expressions
8  */
9 
10 #include "txExprLexer.h"
11 #include "nsGkAtoms.h"
12 #include "nsString.h"
13 #include "nsError.h"
14 #include "txXMLUtils.h"
15 
16 /**
17  * Creates a new ExprLexer
18  */
txExprLexer()19 txExprLexer::txExprLexer()
20     : mPosition(nullptr),
21       mCurrentItem(nullptr),
22       mFirstItem(nullptr),
23       mLastItem(nullptr),
24       mTokenCount(0) {}
25 
26 /**
27  * Destroys this instance of an txExprLexer
28  */
~txExprLexer()29 txExprLexer::~txExprLexer() {
30   //-- delete tokens
31   Token* tok = mFirstItem;
32   while (tok) {
33     Token* temp = tok->mNext;
34     delete tok;
35     tok = temp;
36   }
37   mCurrentItem = nullptr;
38 }
39 
nextToken()40 Token* txExprLexer::nextToken() {
41   if (!mCurrentItem) {
42     MOZ_ASSERT_UNREACHABLE("nextToken called on uninitialized lexer");
43     return nullptr;
44   }
45 
46   if (mCurrentItem->mType == Token::END) {
47     // Do not progress beyond the end token
48     return mCurrentItem;
49   }
50 
51   Token* token = mCurrentItem;
52   mCurrentItem = mCurrentItem->mNext;
53   return token;
54 }
55 
addToken(Token * aToken)56 void txExprLexer::addToken(Token* aToken) {
57   if (mLastItem) {
58     mLastItem->mNext = aToken;
59   }
60   if (!mFirstItem) {
61     mFirstItem = aToken;
62     mCurrentItem = aToken;
63   }
64   mLastItem = aToken;
65   ++mTokenCount;
66 }
67 
68 /**
69  * Returns true if the following Token should be an operator.
70  * This is a helper for the first bullet of [XPath 3.7]
71  *  Lexical Structure
72  */
nextIsOperatorToken(Token * aToken)73 bool txExprLexer::nextIsOperatorToken(Token* aToken) {
74   if (!aToken || aToken->mType == Token::NULL_TOKEN) {
75     return false;
76   }
77   /* This relies on the tokens having the right order in txExprLexer.h */
78   return aToken->mType < Token::COMMA || aToken->mType > Token::UNION_OP;
79 }
80 
81 /**
82  * Parses the given string into a sequence of Tokens
83  */
parse(const nsAString & aPattern)84 nsresult txExprLexer::parse(const nsAString& aPattern) {
85   iterator end;
86   aPattern.BeginReading(mPosition);
87   aPattern.EndReading(end);
88 
89   //-- initialize previous token, this will automatically get
90   //-- deleted when it goes out of scope
91   Token nullToken(nullptr, nullptr, Token::NULL_TOKEN);
92 
93   Token::Type defType;
94   Token* newToken = nullptr;
95   Token* prevToken = &nullToken;
96   bool isToken;
97 
98   while (mPosition < end) {
99     defType = Token::CNAME;
100     isToken = true;
101 
102     if (*mPosition == DOLLAR_SIGN) {
103       if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
104         return NS_ERROR_XPATH_INVALID_VAR_NAME;
105       }
106       defType = Token::VAR_REFERENCE;
107     }
108     // just reuse the QName parsing, which will use defType
109     // the token to construct
110 
111     if (XMLUtils::isLetter(*mPosition)) {
112       // NCName, can get QName or OperatorName;
113       //  FunctionName, NodeName, and AxisSpecifier may want whitespace,
114       //  and are dealt with below
115       iterator start = mPosition;
116       while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
117         /* just go */
118       }
119       if (mPosition < end && *mPosition == COLON) {
120         // try QName or wildcard, might need to step back for axis
121         if (++mPosition == end) {
122           return NS_ERROR_XPATH_UNEXPECTED_END;
123         }
124         if (XMLUtils::isLetter(*mPosition)) {
125           while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
126             /* just go */
127           }
128         } else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
129           // eat wildcard for NameTest, bail for var ref at COLON
130           ++mPosition;
131         } else {
132           --mPosition;  // step back
133         }
134       }
135       if (nextIsOperatorToken(prevToken)) {
136         nsDependentSubstring op(Substring(start, mPosition));
137         if (nsGkAtoms::_and->Equals(op)) {
138           defType = Token::AND_OP;
139         } else if (nsGkAtoms::_or->Equals(op)) {
140           defType = Token::OR_OP;
141         } else if (nsGkAtoms::mod->Equals(op)) {
142           defType = Token::MODULUS_OP;
143         } else if (nsGkAtoms::div->Equals(op)) {
144           defType = Token::DIVIDE_OP;
145         } else {
146           // XXX QUESTION: spec is not too precise
147           // badops is sure an error, but is bad:ops, too? We say yes!
148           return NS_ERROR_XPATH_OPERATOR_EXPECTED;
149         }
150       }
151       newToken = new Token(start, mPosition, defType);
152     } else if (isXPathDigit(*mPosition)) {
153       iterator start = mPosition;
154       while (++mPosition < end && isXPathDigit(*mPosition)) {
155         /* just go */
156       }
157       if (mPosition < end && *mPosition == '.') {
158         while (++mPosition < end && isXPathDigit(*mPosition)) {
159           /* just go */
160         }
161       }
162       newToken = new Token(start, mPosition, Token::NUMBER);
163     } else {
164       switch (*mPosition) {
165           //-- ignore whitespace
166         case SPACE:
167         case TX_TAB:
168         case TX_CR:
169         case TX_LF:
170           ++mPosition;
171           isToken = false;
172           break;
173         case S_QUOTE:
174         case D_QUOTE: {
175           iterator start = mPosition;
176           while (++mPosition < end && *mPosition != *start) {
177             // eat literal
178           }
179           if (mPosition == end) {
180             mPosition = start;
181             return NS_ERROR_XPATH_UNCLOSED_LITERAL;
182           }
183           newToken = new Token(start + 1, mPosition, Token::LITERAL);
184           ++mPosition;
185         } break;
186         case PERIOD:
187           // period can be .., .(DIGITS)+ or ., check next
188           if (++mPosition == end) {
189             newToken = new Token(mPosition - 1, Token::SELF_NODE);
190           } else if (isXPathDigit(*mPosition)) {
191             iterator start = mPosition - 1;
192             while (++mPosition < end && isXPathDigit(*mPosition)) {
193               /* just go */
194             }
195             newToken = new Token(start, mPosition, Token::NUMBER);
196           } else if (*mPosition == PERIOD) {
197             ++mPosition;
198             newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
199           } else {
200             newToken = new Token(mPosition - 1, Token::SELF_NODE);
201           }
202           break;
203         case COLON:  // QNames are dealt above, must be axis ident
204           if (++mPosition >= end || *mPosition != COLON ||
205               prevToken->mType != Token::CNAME) {
206             return NS_ERROR_XPATH_BAD_COLON;
207           }
208           prevToken->mType = Token::AXIS_IDENTIFIER;
209           ++mPosition;
210           isToken = false;
211           break;
212         case FORWARD_SLASH:
213           if (++mPosition < end && *mPosition == FORWARD_SLASH) {
214             ++mPosition;
215             newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
216           } else {
217             newToken = new Token(mPosition - 1, Token::PARENT_OP);
218           }
219           break;
220         case BANG:  // can only be !=
221           if (++mPosition < end && *mPosition == EQUAL) {
222             ++mPosition;
223             newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
224             break;
225           }
226           // Error ! is not not()
227           return NS_ERROR_XPATH_BAD_BANG;
228         case EQUAL:
229           newToken = new Token(mPosition, Token::EQUAL_OP);
230           ++mPosition;
231           break;
232         case L_ANGLE:
233           if (++mPosition == end) {
234             return NS_ERROR_XPATH_UNEXPECTED_END;
235           }
236           if (*mPosition == EQUAL) {
237             ++mPosition;
238             newToken =
239                 new Token(mPosition - 2, mPosition, Token::LESS_OR_EQUAL_OP);
240           } else {
241             newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
242           }
243           break;
244         case R_ANGLE:
245           if (++mPosition == end) {
246             return NS_ERROR_XPATH_UNEXPECTED_END;
247           }
248           if (*mPosition == EQUAL) {
249             ++mPosition;
250             newToken =
251                 new Token(mPosition - 2, mPosition, Token::GREATER_OR_EQUAL_OP);
252           } else {
253             newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
254           }
255           break;
256         case HYPHEN:
257           newToken = new Token(mPosition, Token::SUBTRACTION_OP);
258           ++mPosition;
259           break;
260         case ASTERISK:
261           if (nextIsOperatorToken(prevToken)) {
262             newToken = new Token(mPosition, Token::MULTIPLY_OP);
263           } else {
264             newToken = new Token(mPosition, Token::CNAME);
265           }
266           ++mPosition;
267           break;
268         case L_PAREN:
269           if (prevToken->mType == Token::CNAME) {
270             const nsDependentSubstring& val = prevToken->Value();
271             if (val.EqualsLiteral("comment")) {
272               prevToken->mType = Token::COMMENT_AND_PAREN;
273             } else if (val.EqualsLiteral("node")) {
274               prevToken->mType = Token::NODE_AND_PAREN;
275             } else if (val.EqualsLiteral("processing-instruction")) {
276               prevToken->mType = Token::PROC_INST_AND_PAREN;
277             } else if (val.EqualsLiteral("text")) {
278               prevToken->mType = Token::TEXT_AND_PAREN;
279             } else {
280               prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
281             }
282             isToken = false;
283           } else {
284             newToken = new Token(mPosition, Token::L_PAREN);
285           }
286           ++mPosition;
287           break;
288         case R_PAREN:
289           newToken = new Token(mPosition, Token::R_PAREN);
290           ++mPosition;
291           break;
292         case L_BRACKET:
293           newToken = new Token(mPosition, Token::L_BRACKET);
294           ++mPosition;
295           break;
296         case R_BRACKET:
297           newToken = new Token(mPosition, Token::R_BRACKET);
298           ++mPosition;
299           break;
300         case COMMA:
301           newToken = new Token(mPosition, Token::COMMA);
302           ++mPosition;
303           break;
304         case AT_SIGN:
305           newToken = new Token(mPosition, Token::AT_SIGN);
306           ++mPosition;
307           break;
308         case PLUS:
309           newToken = new Token(mPosition, Token::ADDITION_OP);
310           ++mPosition;
311           break;
312         case VERT_BAR:
313           newToken = new Token(mPosition, Token::UNION_OP);
314           ++mPosition;
315           break;
316         default:
317           // Error, don't grok character :-(
318           return NS_ERROR_XPATH_ILLEGAL_CHAR;
319       }
320     }
321     if (isToken) {
322       NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
323       prevToken = newToken;
324       addToken(newToken);
325     }
326   }
327 
328   // add a endToken to the list
329   newToken = new Token(end, end, Token::END);
330   addToken(newToken);
331 
332   return NS_OK;
333 }
334