1 //------------------------------------------------------------------------------ 2 // <copyright file="XPathParser.cs" company="Microsoft"> 3 // Copyright (c) Microsoft Corporation. All rights reserved. 4 // </copyright> 5 // <owner current="true" primary="true">Microsoft</owner> 6 //------------------------------------------------------------------------------ 7 8 using System.Collections.Generic; 9 using System.Diagnostics; 10 11 namespace System.Xml.Xsl.XPath { 12 using Res = System.Xml.Utils.Res; 13 using XPathNodeType = System.Xml.XPath.XPathNodeType; 14 15 internal class XPathParser<Node> { 16 private XPathScanner scanner; 17 private IXPathBuilder<Node> builder; 18 private Stack<int> posInfo = new Stack<int>(); 19 20 // Six possible causes of exceptions in the builder: 21 // 1. Undefined prefix in a node test. 22 // 2. Undefined prefix in a variable reference, or unknown variable. 23 // 3. Undefined prefix in a function call, or unknown function, or wrong number/types of arguments. 24 // 4. Argument of Union operator is not a node-set. 25 // 5. First argument of Predicate is not a node-set. 26 // 6. Argument of Axis is not a node-set. 27 Parse(XPathScanner scanner, IXPathBuilder<Node> builder, LexKind endLex)28 public Node Parse(XPathScanner scanner, IXPathBuilder<Node> builder, LexKind endLex) { 29 Debug.Assert(this.scanner == null && this.builder == null); 30 Debug.Assert(scanner != null && builder != null); 31 32 Node result = default(Node); 33 this.scanner = scanner; 34 this.builder = builder; 35 this.posInfo.Clear(); 36 37 try { 38 builder.StartBuild(); 39 result = ParseExpr(); 40 scanner.CheckToken(endLex); 41 } 42 catch (XPathCompileException e) { 43 if (e.queryString == null) { 44 e.queryString = scanner.Source; 45 PopPosInfo(out e.startChar, out e.endChar); 46 } 47 throw; 48 } 49 finally { 50 result = builder.EndBuild(result); 51 #if DEBUG 52 this.builder = null; 53 this.scanner = null; 54 #endif 55 } 56 Debug.Assert(posInfo.Count == 0, "PushPosInfo() and PopPosInfo() calls have been unbalanced"); 57 return result; 58 } 59 60 #region Location paths and node tests 61 /**************************************************************************************************/ 62 /* Location paths and node tests */ 63 /**************************************************************************************************/ 64 IsStep(LexKind lexKind)65 internal static bool IsStep(LexKind lexKind) { 66 return ( 67 lexKind == LexKind.Dot || 68 lexKind == LexKind.DotDot || 69 lexKind == LexKind.At || 70 lexKind == LexKind.Axis || 71 lexKind == LexKind.Star || 72 lexKind == LexKind.Name // NodeTest is also Name 73 ); 74 } 75 76 /* 77 * LocationPath ::= RelativeLocationPath | '/' RelativeLocationPath? | '//' RelativeLocationPath 78 */ ParseLocationPath()79 private Node ParseLocationPath() { 80 if (scanner.Kind == LexKind.Slash) { 81 scanner.NextLex(); 82 Node opnd = builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null); 83 84 if (IsStep(scanner.Kind)) { 85 opnd = builder.JoinStep(opnd, ParseRelativeLocationPath()); 86 } 87 return opnd; 88 } else if (scanner.Kind == LexKind.SlashSlash) { 89 scanner.NextLex(); 90 return builder.JoinStep( 91 builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null), 92 builder.JoinStep( 93 builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null), 94 ParseRelativeLocationPath() 95 ) 96 ); 97 } else { 98 return ParseRelativeLocationPath(); 99 } 100 } 101 102 /* 103 * RelativeLocationPath ::= Step (('/' | '//') Step)* 104 */ 105 //Max depth to avoid StackOverflow 106 const int MaxParseRelativePathDepth = 1024; 107 private int parseRelativePath = 0; ParseRelativeLocationPath()108 private Node ParseRelativeLocationPath() { 109 if (++parseRelativePath > MaxParseRelativePathDepth) { 110 if (System.Xml.XmlConfiguration.XsltConfigSection.LimitXPathComplexity) { 111 throw scanner.CreateException(System.Xml.Utils.Res.Xslt_InputTooComplex); 112 } 113 } 114 Node opnd = ParseStep(); 115 if (scanner.Kind == LexKind.Slash) { 116 scanner.NextLex(); 117 opnd = builder.JoinStep(opnd, ParseRelativeLocationPath()); 118 } else if (scanner.Kind == LexKind.SlashSlash) { 119 scanner.NextLex(); 120 opnd = builder.JoinStep(opnd, 121 builder.JoinStep( 122 builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null), 123 ParseRelativeLocationPath() 124 ) 125 ); 126 } 127 --parseRelativePath; 128 return opnd; 129 } 130 131 /* 132 * Step ::= '.' | '..' | (AxisName '::' | '@')? NodeTest Predicate* 133 */ ParseStep()134 private Node ParseStep() { 135 Node opnd; 136 if (LexKind.Dot == scanner.Kind) { // '.' 137 scanner.NextLex(); 138 opnd = builder.Axis(XPathAxis.Self, XPathNodeType.All, null, null); 139 if (LexKind.LBracket == scanner.Kind) { 140 throw scanner.CreateException(Res.XPath_PredicateAfterDot); 141 } 142 } else if (LexKind.DotDot == scanner.Kind) { // '..' 143 scanner.NextLex(); 144 opnd = builder.Axis(XPathAxis.Parent, XPathNodeType.All, null, null); 145 if (LexKind.LBracket == scanner.Kind) { 146 throw scanner.CreateException(Res.XPath_PredicateAfterDotDot); 147 } 148 } else { // (AxisName '::' | '@')? NodeTest Predicate* 149 XPathAxis axis; 150 switch (scanner.Kind) { 151 case LexKind.Axis: // AxisName '::' 152 axis = scanner.Axis; 153 scanner.NextLex(); 154 scanner.NextLex(); 155 break; 156 case LexKind.At: // '@' 157 axis = XPathAxis.Attribute; 158 scanner.NextLex(); 159 break; 160 case LexKind.Name: 161 case LexKind.Star: 162 // NodeTest must start with Name or '*' 163 axis = XPathAxis.Child; 164 break; 165 default: 166 throw scanner.CreateException(Res.XPath_UnexpectedToken, scanner.RawValue); 167 } 168 169 opnd = ParseNodeTest(axis); 170 171 while (LexKind.LBracket == scanner.Kind) { 172 opnd = builder.Predicate(opnd, ParsePredicate(), IsReverseAxis(axis)); 173 } 174 } 175 return opnd; 176 } 177 IsReverseAxis(XPathAxis axis)178 private static bool IsReverseAxis(XPathAxis axis) { 179 return ( 180 axis == XPathAxis.Ancestor || axis == XPathAxis.Preceding || 181 axis == XPathAxis.AncestorOrSelf || axis == XPathAxis.PrecedingSibling 182 ); 183 } 184 185 /* 186 * NodeTest ::= NameTest | ('comment' | 'text' | 'node') '(' ')' | 'processing-instruction' '(' Literal? ')' 187 * NameTest ::= '*' | NCName ':' '*' | QName 188 */ ParseNodeTest(XPathAxis axis)189 private Node ParseNodeTest(XPathAxis axis) { 190 XPathNodeType nodeType; 191 string nodePrefix, nodeName; 192 193 int startChar = scanner.LexStart; 194 InternalParseNodeTest(scanner, axis, out nodeType, out nodePrefix, out nodeName); 195 PushPosInfo(startChar, scanner.PrevLexEnd); 196 Node result = builder.Axis(axis, nodeType, nodePrefix, nodeName); 197 PopPosInfo(); 198 return result; 199 } 200 IsNodeType(XPathScanner scanner)201 private static bool IsNodeType(XPathScanner scanner) { 202 return scanner.Prefix.Length == 0 && ( 203 scanner.Name == "node" || 204 scanner.Name == "text" || 205 scanner.Name == "processing-instruction" || 206 scanner.Name == "comment" 207 ); 208 } 209 PrincipalNodeType(XPathAxis axis)210 private static XPathNodeType PrincipalNodeType(XPathAxis axis) { 211 return ( 212 axis == XPathAxis.Attribute ? XPathNodeType.Attribute : 213 axis == XPathAxis.Namespace ? XPathNodeType.Namespace : 214 /*else*/ XPathNodeType.Element 215 ); 216 } 217 InternalParseNodeTest(XPathScanner scanner, XPathAxis axis, out XPathNodeType nodeType, out string nodePrefix, out string nodeName)218 internal static void InternalParseNodeTest(XPathScanner scanner, XPathAxis axis, out XPathNodeType nodeType, out string nodePrefix, out string nodeName) { 219 switch (scanner.Kind) { 220 case LexKind.Name : 221 if (scanner.CanBeFunction && IsNodeType(scanner)) { 222 nodePrefix = null; 223 nodeName = null; 224 switch (scanner.Name) { 225 case "comment": nodeType = XPathNodeType.Comment; break; 226 case "text" : nodeType = XPathNodeType.Text; break; 227 case "node" : nodeType = XPathNodeType.All; break; 228 default: 229 Debug.Assert(scanner.Name == "processing-instruction"); 230 nodeType = XPathNodeType.ProcessingInstruction; 231 break; 232 } 233 234 scanner.NextLex(); 235 scanner.PassToken(LexKind.LParens); 236 237 if (nodeType == XPathNodeType.ProcessingInstruction) { 238 if (scanner.Kind != LexKind.RParens) { // 'processing-instruction' '(' Literal ')' 239 scanner.CheckToken(LexKind.String); 240 // It is not needed to set nodePrefix here, but for our current implementation 241 // comparing whole QNames is faster than comparing just local names 242 nodePrefix = string.Empty; 243 nodeName = scanner.StringValue; 244 scanner.NextLex(); 245 } 246 } 247 248 scanner.PassToken(LexKind.RParens); 249 } else { 250 nodePrefix = scanner.Prefix; 251 nodeName = scanner.Name; 252 nodeType = PrincipalNodeType(axis); 253 scanner.NextLex(); 254 if (nodeName == "*") { 255 nodeName = null; 256 } 257 } 258 break; 259 case LexKind.Star : 260 nodePrefix = null; 261 nodeName = null; 262 nodeType = PrincipalNodeType(axis); 263 scanner.NextLex(); 264 break; 265 default : 266 throw scanner.CreateException(Res.XPath_NodeTestExpected, scanner.RawValue); 267 } 268 } 269 270 /* 271 * Predicate ::= '[' Expr ']' 272 */ ParsePredicate()273 private Node ParsePredicate() { 274 scanner.PassToken(LexKind.LBracket); 275 Node opnd = ParseExpr(); 276 scanner.PassToken(LexKind.RBracket); 277 return opnd; 278 } 279 #endregion 280 281 #region Expressions 282 /**************************************************************************************************/ 283 /* Expressions */ 284 /**************************************************************************************************/ 285 286 /* 287 * Expr ::= OrExpr 288 * OrExpr ::= AndExpr ('or' AndExpr)* 289 * AndExpr ::= EqualityExpr ('and' EqualityExpr)* 290 * EqualityExpr ::= RelationalExpr (('=' | '!=') RelationalExpr)* 291 * RelationalExpr ::= AdditiveExpr (('<' | '>' | '<=' | '>=') AdditiveExpr)* 292 * AdditiveExpr ::= MultiplicativeExpr (('+' | '-') MultiplicativeExpr)* 293 * MultiplicativeExpr ::= UnaryExpr (('*' | 'div' | 'mod') UnaryExpr)* 294 * UnaryExpr ::= ('-')* UnionExpr 295 */ ParseExpr()296 private Node ParseExpr() { 297 return ParseSubExpr(/*callerPrec:*/0); 298 } 299 300 //Max depth to avoid StackOverflow 301 //limit the recursive call from ParseSubExpr -> ParseSubExpr 302 //and also ParseSubExpr->ParseUnionExpr->ParsePathExpr->...->ParseExpr->ParseSubExpr 303 const int MaxParseSubExprDepth = 1024; 304 private int parseSubExprDepth = 0; ParseSubExpr(int callerPrec)305 private Node ParseSubExpr(int callerPrec) { 306 if (++parseSubExprDepth > MaxParseSubExprDepth) { 307 if (System.Xml.XmlConfiguration.XsltConfigSection.LimitXPathComplexity) { 308 throw scanner.CreateException(System.Xml.Utils.Res.Xslt_InputTooComplex); 309 } 310 } 311 312 XPathOperator op; 313 Node opnd; 314 315 // Check for unary operators 316 if (scanner.Kind == LexKind.Minus) { 317 op = XPathOperator.UnaryMinus; 318 int opPrec = XPathOperatorPrecedence[(int)op]; 319 scanner.NextLex(); 320 opnd = builder.Operator(op, ParseSubExpr(opPrec), default(Node)); 321 } else { 322 opnd = ParseUnionExpr(); 323 } 324 325 // Process binary operators 326 while (true) { 327 op = (scanner.Kind <= LexKind.LastOperator) ? (XPathOperator)scanner.Kind : XPathOperator.Unknown; 328 int opPrec = XPathOperatorPrecedence[(int)op]; 329 if (opPrec <= callerPrec) { 330 break; 331 } 332 333 // Operator's precedence is greater than the one of our caller, so process it here 334 scanner.NextLex(); 335 opnd = builder.Operator(op, opnd, ParseSubExpr(/*callerPrec:*/opPrec)); 336 } 337 --parseSubExprDepth; 338 return opnd; 339 } 340 341 private static int[] XPathOperatorPrecedence = { 342 /*Unknown */ 0, 343 /*Or */ 1, 344 /*And */ 2, 345 /*Eq */ 3, 346 /*Ne */ 3, 347 /*Lt */ 4, 348 /*Le */ 4, 349 /*Gt */ 4, 350 /*Ge */ 4, 351 /*Plus */ 5, 352 /*Minus */ 5, 353 /*Multiply */ 6, 354 /*Divide */ 6, 355 /*Modulo */ 6, 356 /*UnaryMinus */ 7, 357 /*Union */ 8, // Not used 358 }; 359 360 /* 361 * UnionExpr ::= PathExpr ('|' PathExpr)* 362 */ ParseUnionExpr()363 private Node ParseUnionExpr() { 364 int startChar = scanner.LexStart; 365 Node opnd1 = ParsePathExpr(); 366 367 if (scanner.Kind == LexKind.Union) { 368 PushPosInfo(startChar, scanner.PrevLexEnd); 369 opnd1 = builder.Operator(XPathOperator.Union, default(Node), opnd1); 370 PopPosInfo(); 371 372 while (scanner.Kind == LexKind.Union) { 373 scanner.NextLex(); 374 startChar = scanner.LexStart; 375 Node opnd2 = ParsePathExpr(); 376 PushPosInfo(startChar, scanner.PrevLexEnd); 377 opnd1 = builder.Operator(XPathOperator.Union, opnd1, opnd2); 378 PopPosInfo(); 379 } 380 } 381 return opnd1; 382 } 383 384 /* 385 * PathExpr ::= LocationPath | FilterExpr (('/' | '//') RelativeLocationPath )? 386 */ ParsePathExpr()387 private Node ParsePathExpr() { 388 // Here we distinguish FilterExpr from LocationPath - the former starts with PrimaryExpr 389 if (IsPrimaryExpr()) { 390 int startChar = scanner.LexStart; 391 Node opnd = ParseFilterExpr(); 392 int endChar = scanner.PrevLexEnd; 393 394 if (scanner.Kind == LexKind.Slash) { 395 scanner.NextLex(); 396 PushPosInfo(startChar, endChar); 397 opnd = builder.JoinStep(opnd, ParseRelativeLocationPath()); 398 PopPosInfo(); 399 } else if (scanner.Kind == LexKind.SlashSlash) { 400 scanner.NextLex(); 401 PushPosInfo(startChar, endChar); 402 opnd = builder.JoinStep(opnd, 403 builder.JoinStep( 404 builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null), 405 ParseRelativeLocationPath() 406 ) 407 ); 408 PopPosInfo(); 409 } 410 return opnd; 411 } else { 412 return ParseLocationPath(); 413 } 414 } 415 416 /* 417 * FilterExpr ::= PrimaryExpr Predicate* 418 */ ParseFilterExpr()419 private Node ParseFilterExpr() { 420 int startChar = scanner.LexStart; 421 Node opnd = ParsePrimaryExpr(); 422 int endChar = scanner.PrevLexEnd; 423 424 while (scanner.Kind == LexKind.LBracket) { 425 PushPosInfo(startChar, endChar); 426 opnd = builder.Predicate(opnd, ParsePredicate(), /*reverseStep:*/false); 427 PopPosInfo(); 428 } 429 return opnd; 430 } 431 IsPrimaryExpr()432 private bool IsPrimaryExpr() { 433 return ( 434 scanner.Kind == LexKind.String || 435 scanner.Kind == LexKind.Number || 436 scanner.Kind == LexKind.Dollar || 437 scanner.Kind == LexKind.LParens || 438 scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner) 439 ); 440 } 441 442 /* 443 * PrimaryExpr ::= Literal | Number | VariableReference | '(' Expr ')' | FunctionCall 444 */ ParsePrimaryExpr()445 private Node ParsePrimaryExpr() { 446 Debug.Assert(IsPrimaryExpr()); 447 Node opnd; 448 switch (scanner.Kind) { 449 case LexKind.String: 450 opnd = builder.String(scanner.StringValue); 451 scanner.NextLex(); 452 break; 453 case LexKind.Number: 454 opnd = builder.Number(XPathConvert.StringToDouble(scanner.RawValue)); 455 scanner.NextLex(); 456 break; 457 case LexKind.Dollar: 458 int startChar = scanner.LexStart; 459 scanner.NextLex(); 460 scanner.CheckToken(LexKind.Name); 461 PushPosInfo(startChar, scanner.LexStart + scanner.LexSize); 462 opnd = builder.Variable(scanner.Prefix, scanner.Name); 463 PopPosInfo(); 464 scanner.NextLex(); 465 break; 466 case LexKind.LParens: 467 scanner.NextLex(); 468 opnd = ParseExpr(); 469 scanner.PassToken(LexKind.RParens); 470 break; 471 default: 472 Debug.Assert( 473 scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner), 474 "IsPrimaryExpr() returned true, but the lexeme is not recognized" 475 ); 476 opnd = ParseFunctionCall(); 477 break; 478 } 479 return opnd; 480 } 481 482 /* 483 * FunctionCall ::= FunctionName '(' (Expr (',' Expr)* )? ')' 484 */ ParseFunctionCall()485 private Node ParseFunctionCall() { 486 List<Node> argList = new List<Node>(); 487 string name = scanner.Name; 488 string prefix = scanner.Prefix; 489 int startChar = scanner.LexStart; 490 491 scanner.PassToken(LexKind.Name); 492 scanner.PassToken(LexKind.LParens); 493 494 if (scanner.Kind != LexKind.RParens) { 495 while (true) { 496 argList.Add(ParseExpr()); 497 if (scanner.Kind != LexKind.Comma) { 498 scanner.CheckToken(LexKind.RParens); 499 break; 500 } 501 scanner.NextLex(); // move off the ',' 502 } 503 } 504 505 scanner.NextLex(); // move off the ')' 506 PushPosInfo(startChar, scanner.PrevLexEnd); 507 Node result = builder.Function(prefix, name, argList); 508 PopPosInfo(); 509 return result; 510 } 511 #endregion 512 513 /**************************************************************************************************/ 514 /* Helper methods */ 515 /**************************************************************************************************/ 516 PushPosInfo(int startChar, int endChar)517 private void PushPosInfo(int startChar, int endChar) { 518 posInfo.Push(startChar); 519 posInfo.Push(endChar); 520 } 521 PopPosInfo()522 private void PopPosInfo() { 523 posInfo.Pop(); 524 posInfo.Pop(); 525 } 526 PopPosInfo(out int startChar, out int endChar)527 private void PopPosInfo(out int startChar, out int endChar) { 528 endChar = posInfo.Pop(); 529 startChar = posInfo.Pop(); 530 } 531 } 532 } 533