1 /*
2 * Copyright 2005 Maksim Orlovich <maksim@kde.org>
3 * Copyright (C) 2006 Apple Computer, Inc.
4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "config.h"
29 #include "XPathParser.h"
30
31 #if ENABLE(XPATH)
32
33 #include "ExceptionCode.h"
34 #include "XPathEvaluator.h"
35 #include "XPathException.h"
36 #include "XPathNSResolver.h"
37 #include "XPathStep.h"
38 #include <wtf/StdLibExtras.h>
39 #include <wtf/text/StringHash.h>
40
41 int xpathyyparse(void*);
42
43 using namespace WTF;
44 using namespace Unicode;
45
46 namespace WebCore {
47 namespace XPath {
48
49 class LocationPath;
50
51 #include "XPathGrammar.h"
52
53 Parser* Parser::currentParser = 0;
54
55 enum XMLCat { NameStart, NameCont, NotPartOfName };
56
57 typedef HashMap<String, Step::Axis> AxisNamesMap;
58
charCat(UChar aChar)59 static XMLCat charCat(UChar aChar)
60 {
61 //### might need to add some special cases from the XML spec.
62
63 if (aChar == '_')
64 return NameStart;
65
66 if (aChar == '.' || aChar == '-')
67 return NameCont;
68 CharCategory category = Unicode::category(aChar);
69 if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
70 return NameStart;
71 if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
72 return NameCont;
73 return NotPartOfName;
74 }
75
setUpAxisNamesMap(AxisNamesMap & axisNames)76 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
77 {
78 struct AxisName {
79 const char* name;
80 Step::Axis axis;
81 };
82 const AxisName axisNameList[] = {
83 { "ancestor", Step::AncestorAxis },
84 { "ancestor-or-self", Step::AncestorOrSelfAxis },
85 { "attribute", Step::AttributeAxis },
86 { "child", Step::ChildAxis },
87 { "descendant", Step::DescendantAxis },
88 { "descendant-or-self", Step::DescendantOrSelfAxis },
89 { "following", Step::FollowingAxis },
90 { "following-sibling", Step::FollowingSiblingAxis },
91 { "namespace", Step::NamespaceAxis },
92 { "parent", Step::ParentAxis },
93 { "preceding", Step::PrecedingAxis },
94 { "preceding-sibling", Step::PrecedingSiblingAxis },
95 { "self", Step::SelfAxis }
96 };
97 for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
98 axisNames.set(axisNameList[i].name, axisNameList[i].axis);
99 }
100
isAxisName(const String & name,Step::Axis & type)101 static bool isAxisName(const String& name, Step::Axis& type)
102 {
103 DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
104
105 if (axisNames.isEmpty())
106 setUpAxisNamesMap(axisNames);
107
108 AxisNamesMap::iterator it = axisNames.find(name);
109 if (it == axisNames.end())
110 return false;
111 type = it->second;
112 return true;
113 }
114
isNodeTypeName(const String & name)115 static bool isNodeTypeName(const String& name)
116 {
117 DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
118 if (nodeTypeNames.isEmpty()) {
119 nodeTypeNames.add("comment");
120 nodeTypeNames.add("text");
121 nodeTypeNames.add("processing-instruction");
122 nodeTypeNames.add("node");
123 }
124 return nodeTypeNames.contains(name);
125 }
126
127 // Returns whether the current token can possibly be a binary operator, given
128 // the previous token. Necessary to disambiguate some of the operators
129 // (* (multiply), div, and, or, mod) in the [32] Operator rule
130 // (check http://www.w3.org/TR/xpath#exprlex).
isBinaryOperatorContext() const131 bool Parser::isBinaryOperatorContext() const
132 {
133 switch (m_lastTokenType) {
134 case 0:
135 case '@': case AXISNAME: case '(': case '[': case ',':
136 case AND: case OR: case MULOP:
137 case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
138 case EQOP: case RELOP:
139 return false;
140 default:
141 return true;
142 }
143 }
144
skipWS()145 void Parser::skipWS()
146 {
147 while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
148 ++m_nextPos;
149 }
150
makeTokenAndAdvance(int code,int advance)151 Token Parser::makeTokenAndAdvance(int code, int advance)
152 {
153 m_nextPos += advance;
154 return Token(code);
155 }
156
makeTokenAndAdvance(int code,NumericOp::Opcode val,int advance)157 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
158 {
159 m_nextPos += advance;
160 return Token(code, val);
161 }
162
makeTokenAndAdvance(int code,EqTestOp::Opcode val,int advance)163 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
164 {
165 m_nextPos += advance;
166 return Token(code, val);
167 }
168
169 // Returns next char if it's there and interesting, 0 otherwise
peekAheadHelper()170 char Parser::peekAheadHelper()
171 {
172 if (m_nextPos + 1 >= m_data.length())
173 return 0;
174 UChar next = m_data[m_nextPos + 1];
175 if (next >= 0xff)
176 return 0;
177 return next;
178 }
179
peekCurHelper()180 char Parser::peekCurHelper()
181 {
182 if (m_nextPos >= m_data.length())
183 return 0;
184 UChar next = m_data[m_nextPos];
185 if (next >= 0xff)
186 return 0;
187 return next;
188 }
189
lexString()190 Token Parser::lexString()
191 {
192 UChar delimiter = m_data[m_nextPos];
193 int startPos = m_nextPos + 1;
194
195 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
196 if (m_data[m_nextPos] == delimiter) {
197 String value = m_data.substring(startPos, m_nextPos - startPos);
198 if (value.isNull())
199 value = "";
200 ++m_nextPos; // Consume the char.
201 return Token(LITERAL, value);
202 }
203 }
204
205 // Ouch, went off the end -- report error.
206 return Token(XPATH_ERROR);
207 }
208
lexNumber()209 Token Parser::lexNumber()
210 {
211 int startPos = m_nextPos;
212 bool seenDot = false;
213
214 // Go until end or a non-digits character.
215 for (; m_nextPos < m_data.length(); ++m_nextPos) {
216 UChar aChar = m_data[m_nextPos];
217 if (aChar >= 0xff) break;
218
219 if (aChar < '0' || aChar > '9') {
220 if (aChar == '.' && !seenDot)
221 seenDot = true;
222 else
223 break;
224 }
225 }
226
227 return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
228 }
229
lexNCName(String & name)230 bool Parser::lexNCName(String& name)
231 {
232 int startPos = m_nextPos;
233 if (m_nextPos >= m_data.length())
234 return false;
235
236 if (charCat(m_data[m_nextPos]) != NameStart)
237 return false;
238
239 // Keep going until we get a character that's not good for names.
240 for (; m_nextPos < m_data.length(); ++m_nextPos)
241 if (charCat(m_data[m_nextPos]) == NotPartOfName)
242 break;
243
244 name = m_data.substring(startPos, m_nextPos - startPos);
245 return true;
246 }
247
lexQName(String & name)248 bool Parser::lexQName(String& name)
249 {
250 String n1;
251 if (!lexNCName(n1))
252 return false;
253
254 skipWS();
255
256 // If the next character is :, what we just got it the prefix, if not,
257 // it's the whole thing.
258 if (peekAheadHelper() != ':') {
259 name = n1;
260 return true;
261 }
262
263 String n2;
264 if (!lexNCName(n2))
265 return false;
266
267 name = n1 + ":" + n2;
268 return true;
269 }
270
nextTokenInternal()271 Token Parser::nextTokenInternal()
272 {
273 skipWS();
274
275 if (m_nextPos >= m_data.length())
276 return Token(0);
277
278 char code = peekCurHelper();
279 switch (code) {
280 case '(': case ')': case '[': case ']':
281 case '@': case ',': case '|':
282 return makeTokenAndAdvance(code);
283 case '\'':
284 case '\"':
285 return lexString();
286 case '0': case '1': case '2': case '3': case '4':
287 case '5': case '6': case '7': case '8': case '9':
288 return lexNumber();
289 case '.': {
290 char next = peekAheadHelper();
291 if (next == '.')
292 return makeTokenAndAdvance(DOTDOT, 2);
293 if (next >= '0' && next <= '9')
294 return lexNumber();
295 return makeTokenAndAdvance('.');
296 }
297 case '/':
298 if (peekAheadHelper() == '/')
299 return makeTokenAndAdvance(SLASHSLASH, 2);
300 return makeTokenAndAdvance('/');
301 case '+':
302 return makeTokenAndAdvance(PLUS);
303 case '-':
304 return makeTokenAndAdvance(MINUS);
305 case '=':
306 return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
307 case '!':
308 if (peekAheadHelper() == '=')
309 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
310 return Token(XPATH_ERROR);
311 case '<':
312 if (peekAheadHelper() == '=')
313 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
314 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
315 case '>':
316 if (peekAheadHelper() == '=')
317 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
318 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
319 case '*':
320 if (isBinaryOperatorContext())
321 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
322 ++m_nextPos;
323 return Token(NAMETEST, "*");
324 case '$': { // $ QName
325 m_nextPos++;
326 String name;
327 if (!lexQName(name))
328 return Token(XPATH_ERROR);
329 return Token(VARIABLEREFERENCE, name);
330 }
331 }
332
333 String name;
334 if (!lexNCName(name))
335 return Token(XPATH_ERROR);
336
337 skipWS();
338 // If we're in an operator context, check for any operator names
339 if (isBinaryOperatorContext()) {
340 if (name == "and") //### hash?
341 return Token(AND);
342 if (name == "or")
343 return Token(OR);
344 if (name == "mod")
345 return Token(MULOP, NumericOp::OP_Mod);
346 if (name == "div")
347 return Token(MULOP, NumericOp::OP_Div);
348 }
349
350 // See whether we are at a :
351 if (peekCurHelper() == ':') {
352 m_nextPos++;
353 // Any chance it's an axis name?
354 if (peekCurHelper() == ':') {
355 m_nextPos++;
356
357 //It might be an axis name.
358 Step::Axis axis;
359 if (isAxisName(name, axis))
360 return Token(AXISNAME, axis);
361 // Ugh, :: is only valid in axis names -> error
362 return Token(XPATH_ERROR);
363 }
364
365 // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
366 skipWS();
367 if (peekCurHelper() == '*') {
368 m_nextPos++;
369 return Token(NAMETEST, name + ":*");
370 }
371
372 // Make a full qname.
373 String n2;
374 if (!lexNCName(n2))
375 return Token(XPATH_ERROR);
376
377 name = name + ":" + n2;
378 }
379
380 skipWS();
381 if (peekCurHelper() == '(') {
382 //note: we don't swallow the (here!
383
384 //either node type of function name
385 if (isNodeTypeName(name)) {
386 if (name == "processing-instruction")
387 return Token(PI, name);
388
389 return Token(NODETYPE, name);
390 }
391 //must be a function name.
392 return Token(FUNCTIONNAME, name);
393 }
394
395 // At this point, it must be NAMETEST.
396 return Token(NAMETEST, name);
397 }
398
nextToken()399 Token Parser::nextToken()
400 {
401 Token toRet = nextTokenInternal();
402 m_lastTokenType = toRet.type;
403 return toRet;
404 }
405
Parser()406 Parser::Parser()
407 {
408 reset(String());
409 }
410
~Parser()411 Parser::~Parser()
412 {
413 }
414
reset(const String & data)415 void Parser::reset(const String& data)
416 {
417 m_nextPos = 0;
418 m_data = data;
419 m_lastTokenType = 0;
420
421 m_topExpr = 0;
422 m_gotNamespaceError = false;
423 }
424
lex(void * data)425 int Parser::lex(void* data)
426 {
427 YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
428 Token tok = nextToken();
429
430 switch (tok.type) {
431 case AXISNAME:
432 yylval->axis = tok.axis;
433 break;
434 case MULOP:
435 yylval->numop = tok.numop;
436 break;
437 case RELOP:
438 case EQOP:
439 yylval->eqop = tok.eqop;
440 break;
441 case NODETYPE:
442 case PI:
443 case FUNCTIONNAME:
444 case LITERAL:
445 case VARIABLEREFERENCE:
446 case NUMBER:
447 case NAMETEST:
448 yylval->str = new String(tok.str);
449 registerString(yylval->str);
450 break;
451 }
452
453 return tok.type;
454 }
455
expandQName(const String & qName,String & localName,String & namespaceURI)456 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
457 {
458 size_t colon = qName.find(':');
459 if (colon != notFound) {
460 if (!m_resolver)
461 return false;
462 namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
463 if (namespaceURI.isNull())
464 return false;
465 localName = qName.substring(colon + 1);
466 } else
467 localName = qName;
468
469 return true;
470 }
471
parseStatement(const String & statement,PassRefPtr<XPathNSResolver> resolver,ExceptionCode & ec)472 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
473 {
474 reset(statement);
475
476 m_resolver = resolver;
477
478 Parser* oldParser = currentParser;
479 currentParser = this;
480 int parseError = xpathyyparse(this);
481 currentParser = oldParser;
482
483 if (parseError) {
484 deleteAllValues(m_parseNodes);
485 m_parseNodes.clear();
486
487 HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
488 for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
489 deleteAllValues(**it);
490 delete *it;
491 }
492 m_predicateVectors.clear();
493
494 HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
495 for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
496 deleteAllValues(**it);
497 delete *it;
498 }
499 m_expressionVectors.clear();
500
501 deleteAllValues(m_strings);
502 m_strings.clear();
503
504 deleteAllValues(m_nodeTests);
505 m_nodeTests.clear();
506
507 m_topExpr = 0;
508
509 if (m_gotNamespaceError)
510 ec = NAMESPACE_ERR;
511 else
512 ec = XPathException::INVALID_EXPRESSION_ERR;
513 return 0;
514 }
515
516 ASSERT(m_parseNodes.size() == 1);
517 ASSERT(*m_parseNodes.begin() == m_topExpr);
518 ASSERT(m_expressionVectors.size() == 0);
519 ASSERT(m_predicateVectors.size() == 0);
520 ASSERT(m_strings.size() == 0);
521 ASSERT(m_nodeTests.size() == 0);
522
523 m_parseNodes.clear();
524 Expression* result = m_topExpr;
525 m_topExpr = 0;
526
527 return result;
528 }
529
registerParseNode(ParseNode * node)530 void Parser::registerParseNode(ParseNode* node)
531 {
532 if (node == 0)
533 return;
534
535 ASSERT(!m_parseNodes.contains(node));
536
537 m_parseNodes.add(node);
538 }
539
unregisterParseNode(ParseNode * node)540 void Parser::unregisterParseNode(ParseNode* node)
541 {
542 if (node == 0)
543 return;
544
545 ASSERT(m_parseNodes.contains(node));
546
547 m_parseNodes.remove(node);
548 }
549
registerPredicateVector(Vector<Predicate * > * vector)550 void Parser::registerPredicateVector(Vector<Predicate*>* vector)
551 {
552 if (vector == 0)
553 return;
554
555 ASSERT(!m_predicateVectors.contains(vector));
556
557 m_predicateVectors.add(vector);
558 }
559
deletePredicateVector(Vector<Predicate * > * vector)560 void Parser::deletePredicateVector(Vector<Predicate*>* vector)
561 {
562 if (vector == 0)
563 return;
564
565 ASSERT(m_predicateVectors.contains(vector));
566
567 m_predicateVectors.remove(vector);
568 delete vector;
569 }
570
571
registerExpressionVector(Vector<Expression * > * vector)572 void Parser::registerExpressionVector(Vector<Expression*>* vector)
573 {
574 if (vector == 0)
575 return;
576
577 ASSERT(!m_expressionVectors.contains(vector));
578
579 m_expressionVectors.add(vector);
580 }
581
deleteExpressionVector(Vector<Expression * > * vector)582 void Parser::deleteExpressionVector(Vector<Expression*>* vector)
583 {
584 if (vector == 0)
585 return;
586
587 ASSERT(m_expressionVectors.contains(vector));
588
589 m_expressionVectors.remove(vector);
590 delete vector;
591 }
592
registerString(String * s)593 void Parser::registerString(String* s)
594 {
595 if (s == 0)
596 return;
597
598 ASSERT(!m_strings.contains(s));
599
600 m_strings.add(s);
601 }
602
deleteString(String * s)603 void Parser::deleteString(String* s)
604 {
605 if (s == 0)
606 return;
607
608 ASSERT(m_strings.contains(s));
609
610 m_strings.remove(s);
611 delete s;
612 }
613
registerNodeTest(Step::NodeTest * t)614 void Parser::registerNodeTest(Step::NodeTest* t)
615 {
616 if (t == 0)
617 return;
618
619 ASSERT(!m_nodeTests.contains(t));
620
621 m_nodeTests.add(t);
622 }
623
deleteNodeTest(Step::NodeTest * t)624 void Parser::deleteNodeTest(Step::NodeTest* t)
625 {
626 if (t == 0)
627 return;
628
629 ASSERT(m_nodeTests.contains(t));
630
631 m_nodeTests.remove(t);
632 delete t;
633 }
634
635 }
636 }
637
638 #endif // ENABLE(XPATH)
639