1 //------------------------------------------------------------------------------
2 // <copyright file="XPathParser.cs" company="Microsoft">
3 //     Copyright (c) Microsoft Corporation.  All rights reserved.
4 // </copyright>
5 // <owner current="true" primary="true">Microsoft</owner>
6 //------------------------------------------------------------------------------
7 
8 using System.Collections.Generic;
9 using System.Diagnostics;
10 
11 namespace System.Xml.Xsl.XPath {
12     using Res           = System.Xml.Utils.Res;
13     using XPathNodeType = System.Xml.XPath.XPathNodeType;
14 
15     internal class XPathParser<Node> {
16         private XPathScanner        scanner;
17         private IXPathBuilder<Node> builder;
18         private Stack<int>          posInfo = new Stack<int>();
19 
20         // Six possible causes of exceptions in the builder:
21         // 1. Undefined prefix in a node test.
22         // 2. Undefined prefix in a variable reference, or unknown variable.
23         // 3. Undefined prefix in a function call, or unknown function, or wrong number/types of arguments.
24         // 4. Argument of Union operator is not a node-set.
25         // 5. First argument of Predicate is not a node-set.
26         // 6. Argument of Axis is not a node-set.
27 
Parse(XPathScanner scanner, IXPathBuilder<Node> builder, LexKind endLex)28         public Node Parse(XPathScanner scanner, IXPathBuilder<Node> builder, LexKind endLex) {
29             Debug.Assert(this.scanner == null && this.builder == null);
30             Debug.Assert(scanner != null && builder != null);
31 
32             Node result     = default(Node);
33             this.scanner    = scanner;
34             this.builder    = builder;
35             this.posInfo.Clear();
36 
37             try {
38                 builder.StartBuild();
39                 result = ParseExpr();
40                 scanner.CheckToken(endLex);
41             }
42             catch (XPathCompileException e) {
43                 if (e.queryString == null) {
44                     e.queryString = scanner.Source;
45                     PopPosInfo(out e.startChar, out e.endChar);
46                 }
47                 throw;
48             }
49             finally {
50                 result = builder.EndBuild(result);
51 #if DEBUG
52                 this.builder = null;
53                 this.scanner = null;
54 #endif
55             }
56             Debug.Assert(posInfo.Count == 0, "PushPosInfo() and PopPosInfo() calls have been unbalanced");
57             return result;
58         }
59 
60         #region Location paths and node tests
61         /**************************************************************************************************/
62         /*  Location paths and node tests                                                                 */
63         /**************************************************************************************************/
64 
IsStep(LexKind lexKind)65         internal static bool IsStep(LexKind lexKind) {
66             return (
67                 lexKind == LexKind.Dot    ||
68                 lexKind == LexKind.DotDot ||
69                 lexKind == LexKind.At     ||
70                 lexKind == LexKind.Axis   ||
71                 lexKind == LexKind.Star   ||
72                 lexKind == LexKind.Name   // NodeTest is also Name
73             );
74         }
75 
76         /*
77         *   LocationPath ::= RelativeLocationPath | '/' RelativeLocationPath? | '//' RelativeLocationPath
78         */
ParseLocationPath()79         private Node ParseLocationPath() {
80             if (scanner.Kind == LexKind.Slash) {
81                 scanner.NextLex();
82                 Node opnd = builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null);
83 
84                 if (IsStep(scanner.Kind)) {
85                     opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
86                 }
87                 return opnd;
88             } else if (scanner.Kind == LexKind.SlashSlash) {
89                 scanner.NextLex();
90                 return builder.JoinStep(
91                     builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null),
92                     builder.JoinStep(
93                         builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
94                         ParseRelativeLocationPath()
95                     )
96                 );
97             } else {
98                 return ParseRelativeLocationPath();
99             }
100         }
101 
102         /*
103         *   RelativeLocationPath ::= Step (('/' | '//') Step)*
104         */
105         //Max depth to avoid StackOverflow
106         const int MaxParseRelativePathDepth = 1024;
107         private int parseRelativePath = 0;
ParseRelativeLocationPath()108         private Node ParseRelativeLocationPath() {
109             if (++parseRelativePath > MaxParseRelativePathDepth) {
110                 if (System.Xml.XmlConfiguration.XsltConfigSection.LimitXPathComplexity) {
111                     throw scanner.CreateException(System.Xml.Utils.Res.Xslt_InputTooComplex);
112                 }
113             }
114             Node opnd = ParseStep();
115             if (scanner.Kind == LexKind.Slash) {
116                 scanner.NextLex();
117                 opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
118             } else if (scanner.Kind == LexKind.SlashSlash) {
119                 scanner.NextLex();
120                 opnd = builder.JoinStep(opnd,
121                     builder.JoinStep(
122                         builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
123                         ParseRelativeLocationPath()
124                     )
125                 );
126             }
127             --parseRelativePath;
128             return opnd;
129         }
130 
131         /*
132         *   Step ::= '.' | '..' | (AxisName '::' | '@')? NodeTest Predicate*
133         */
ParseStep()134         private Node ParseStep() {
135             Node opnd;
136             if (LexKind.Dot == scanner.Kind) {                  // '.'
137                 scanner.NextLex();
138                 opnd = builder.Axis(XPathAxis.Self, XPathNodeType.All, null, null);
139                 if (LexKind.LBracket == scanner.Kind) {
140                     throw scanner.CreateException(Res.XPath_PredicateAfterDot);
141                 }
142             } else if (LexKind.DotDot == scanner.Kind) {        // '..'
143                 scanner.NextLex();
144                 opnd = builder.Axis(XPathAxis.Parent, XPathNodeType.All, null, null);
145                 if (LexKind.LBracket == scanner.Kind) {
146                     throw scanner.CreateException(Res.XPath_PredicateAfterDotDot);
147                 }
148             } else {                                            // (AxisName '::' | '@')? NodeTest Predicate*
149                 XPathAxis axis;
150                 switch (scanner.Kind) {
151                 case LexKind.Axis:                              // AxisName '::'
152                     axis = scanner.Axis;
153                     scanner.NextLex();
154                     scanner.NextLex();
155                     break;
156                 case LexKind.At:                                // '@'
157                     axis = XPathAxis.Attribute;
158                     scanner.NextLex();
159                     break;
160                 case LexKind.Name:
161                 case LexKind.Star:
162                     // NodeTest must start with Name or '*'
163                     axis = XPathAxis.Child;
164                     break;
165                 default:
166                     throw scanner.CreateException(Res.XPath_UnexpectedToken, scanner.RawValue);
167                 }
168 
169                 opnd = ParseNodeTest(axis);
170 
171                 while (LexKind.LBracket == scanner.Kind) {
172                     opnd = builder.Predicate(opnd, ParsePredicate(), IsReverseAxis(axis));
173                 }
174             }
175             return opnd;
176         }
177 
IsReverseAxis(XPathAxis axis)178         private static bool IsReverseAxis(XPathAxis axis) {
179             return (
180                 axis == XPathAxis.Ancestor       || axis == XPathAxis.Preceding ||
181                 axis == XPathAxis.AncestorOrSelf || axis == XPathAxis.PrecedingSibling
182             );
183         }
184 
185         /*
186         *   NodeTest ::= NameTest | ('comment' | 'text' | 'node') '(' ')' | 'processing-instruction' '('  Literal? ')'
187         *   NameTest ::= '*' | NCName ':' '*' | QName
188         */
ParseNodeTest(XPathAxis axis)189         private Node ParseNodeTest(XPathAxis axis) {
190             XPathNodeType nodeType;
191             string        nodePrefix, nodeName;
192 
193             int startChar = scanner.LexStart;
194             InternalParseNodeTest(scanner, axis, out nodeType, out nodePrefix, out nodeName);
195             PushPosInfo(startChar, scanner.PrevLexEnd);
196             Node result = builder.Axis(axis, nodeType, nodePrefix, nodeName);
197             PopPosInfo();
198             return result;
199         }
200 
IsNodeType(XPathScanner scanner)201         private static bool IsNodeType(XPathScanner scanner) {
202             return scanner.Prefix.Length == 0 && (
203                 scanner.Name == "node"                   ||
204                 scanner.Name == "text"                   ||
205                 scanner.Name == "processing-instruction" ||
206                 scanner.Name == "comment"
207             );
208         }
209 
PrincipalNodeType(XPathAxis axis)210         private static XPathNodeType PrincipalNodeType(XPathAxis axis) {
211             return (
212                 axis == XPathAxis.Attribute ? XPathNodeType.Attribute :
213                 axis == XPathAxis.Namespace ? XPathNodeType.Namespace :
214                 /*else*/                      XPathNodeType.Element
215             );
216         }
217 
InternalParseNodeTest(XPathScanner scanner, XPathAxis axis, out XPathNodeType nodeType, out string nodePrefix, out string nodeName)218         internal static void InternalParseNodeTest(XPathScanner scanner, XPathAxis axis, out XPathNodeType nodeType, out string nodePrefix, out string nodeName) {
219             switch (scanner.Kind) {
220             case LexKind.Name :
221                 if (scanner.CanBeFunction && IsNodeType(scanner)) {
222                     nodePrefix = null;
223                     nodeName   = null;
224                     switch (scanner.Name) {
225                     case "comment": nodeType = XPathNodeType.Comment; break;
226                     case "text"   : nodeType = XPathNodeType.Text;    break;
227                     case "node"   : nodeType = XPathNodeType.All;     break;
228                     default:
229                         Debug.Assert(scanner.Name == "processing-instruction");
230                         nodeType = XPathNodeType.ProcessingInstruction;
231                         break;
232                     }
233 
234                     scanner.NextLex();
235                     scanner.PassToken(LexKind.LParens);
236 
237                     if (nodeType == XPathNodeType.ProcessingInstruction) {
238                         if (scanner.Kind != LexKind.RParens) {  // 'processing-instruction' '(' Literal ')'
239                             scanner.CheckToken(LexKind.String);
240                             // It is not needed to set nodePrefix here, but for our current implementation
241                             // comparing whole QNames is faster than comparing just local names
242                             nodePrefix = string.Empty;
243                             nodeName   = scanner.StringValue;
244                             scanner.NextLex();
245                         }
246                     }
247 
248                     scanner.PassToken(LexKind.RParens);
249                 } else {
250                     nodePrefix = scanner.Prefix;
251                     nodeName   = scanner.Name;
252                     nodeType   = PrincipalNodeType(axis);
253                     scanner.NextLex();
254                     if (nodeName == "*") {
255                         nodeName = null;
256                     }
257                 }
258                 break;
259             case LexKind.Star :
260                 nodePrefix = null;
261                 nodeName   = null;
262                 nodeType   = PrincipalNodeType(axis);
263                 scanner.NextLex();
264                 break;
265             default :
266                 throw scanner.CreateException(Res.XPath_NodeTestExpected, scanner.RawValue);
267             }
268         }
269 
270         /*
271         *   Predicate ::= '[' Expr ']'
272         */
ParsePredicate()273         private Node ParsePredicate() {
274             scanner.PassToken(LexKind.LBracket);
275             Node opnd = ParseExpr();
276             scanner.PassToken(LexKind.RBracket);
277             return opnd;
278         }
279         #endregion
280 
281         #region Expressions
282         /**************************************************************************************************/
283         /*  Expressions                                                                                   */
284         /**************************************************************************************************/
285 
286         /*
287         *   Expr   ::= OrExpr
288         *   OrExpr ::= AndExpr ('or' AndExpr)*
289         *   AndExpr ::= EqualityExpr ('and' EqualityExpr)*
290         *   EqualityExpr ::= RelationalExpr (('=' | '!=') RelationalExpr)*
291         *   RelationalExpr ::= AdditiveExpr (('<' | '>' | '<=' | '>=') AdditiveExpr)*
292         *   AdditiveExpr ::= MultiplicativeExpr (('+' | '-') MultiplicativeExpr)*
293         *   MultiplicativeExpr ::= UnaryExpr (('*' | 'div' | 'mod') UnaryExpr)*
294         *   UnaryExpr ::= ('-')* UnionExpr
295         */
ParseExpr()296         private Node ParseExpr() {
297             return ParseSubExpr(/*callerPrec:*/0);
298         }
299 
300         //Max depth to avoid StackOverflow
301         //limit the recursive call from ParseSubExpr -> ParseSubExpr
302         //and also ParseSubExpr->ParseUnionExpr->ParsePathExpr->...->ParseExpr->ParseSubExpr
303         const int MaxParseSubExprDepth = 1024;
304         private int parseSubExprDepth = 0;
ParseSubExpr(int callerPrec)305         private Node ParseSubExpr(int callerPrec) {
306             if (++parseSubExprDepth > MaxParseSubExprDepth) {
307                 if (System.Xml.XmlConfiguration.XsltConfigSection.LimitXPathComplexity) {
308                     throw scanner.CreateException(System.Xml.Utils.Res.Xslt_InputTooComplex);
309                 }
310             }
311 
312             XPathOperator op;
313             Node opnd;
314 
315             // Check for unary operators
316             if (scanner.Kind == LexKind.Minus) {
317                 op = XPathOperator.UnaryMinus;
318                 int opPrec = XPathOperatorPrecedence[(int)op];
319                 scanner.NextLex();
320                 opnd = builder.Operator(op, ParseSubExpr(opPrec), default(Node));
321             } else {
322                 opnd = ParseUnionExpr();
323             }
324 
325             // Process binary operators
326             while (true) {
327                 op = (scanner.Kind <= LexKind.LastOperator) ? (XPathOperator)scanner.Kind : XPathOperator.Unknown;
328                 int opPrec = XPathOperatorPrecedence[(int)op];
329                 if (opPrec <= callerPrec) {
330                     break;
331                 }
332 
333                 // Operator's precedence is greater than the one of our caller, so process it here
334                 scanner.NextLex();
335                 opnd = builder.Operator(op, opnd, ParseSubExpr(/*callerPrec:*/opPrec));
336             }
337             --parseSubExprDepth;
338             return opnd;
339         }
340 
341         private static int[] XPathOperatorPrecedence = {
342             /*Unknown    */ 0,
343             /*Or         */ 1,
344             /*And        */ 2,
345             /*Eq         */ 3,
346             /*Ne         */ 3,
347             /*Lt         */ 4,
348             /*Le         */ 4,
349             /*Gt         */ 4,
350             /*Ge         */ 4,
351             /*Plus       */ 5,
352             /*Minus      */ 5,
353             /*Multiply   */ 6,
354             /*Divide     */ 6,
355             /*Modulo     */ 6,
356             /*UnaryMinus */ 7,
357             /*Union      */ 8,  // Not used
358         };
359 
360         /*
361         *   UnionExpr ::= PathExpr ('|' PathExpr)*
362         */
ParseUnionExpr()363         private Node ParseUnionExpr() {
364             int startChar = scanner.LexStart;
365             Node opnd1 = ParsePathExpr();
366 
367             if (scanner.Kind == LexKind.Union) {
368                 PushPosInfo(startChar, scanner.PrevLexEnd);
369                 opnd1 = builder.Operator(XPathOperator.Union, default(Node), opnd1);
370                 PopPosInfo();
371 
372                 while (scanner.Kind == LexKind.Union) {
373                     scanner.NextLex();
374                     startChar = scanner.LexStart;
375                     Node opnd2 = ParsePathExpr();
376                     PushPosInfo(startChar, scanner.PrevLexEnd);
377                     opnd1 = builder.Operator(XPathOperator.Union, opnd1, opnd2);
378                     PopPosInfo();
379                 }
380             }
381             return opnd1;
382         }
383 
384         /*
385         *   PathExpr ::= LocationPath | FilterExpr (('/' | '//') RelativeLocationPath )?
386         */
ParsePathExpr()387         private Node ParsePathExpr() {
388             // Here we distinguish FilterExpr from LocationPath - the former starts with PrimaryExpr
389             if (IsPrimaryExpr()) {
390                 int startChar = scanner.LexStart;
391                 Node opnd = ParseFilterExpr();
392                 int endChar = scanner.PrevLexEnd;
393 
394                 if (scanner.Kind == LexKind.Slash) {
395                     scanner.NextLex();
396                     PushPosInfo(startChar, endChar);
397                     opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
398                     PopPosInfo();
399                 } else if (scanner.Kind == LexKind.SlashSlash) {
400                     scanner.NextLex();
401                     PushPosInfo(startChar, endChar);
402                     opnd = builder.JoinStep(opnd,
403                         builder.JoinStep(
404                             builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
405                             ParseRelativeLocationPath()
406                         )
407                     );
408                     PopPosInfo();
409                 }
410                 return opnd;
411             } else {
412                 return ParseLocationPath();
413             }
414         }
415 
416         /*
417         *   FilterExpr ::= PrimaryExpr Predicate*
418         */
ParseFilterExpr()419         private Node ParseFilterExpr() {
420             int startChar = scanner.LexStart;
421             Node opnd = ParsePrimaryExpr();
422             int endChar = scanner.PrevLexEnd;
423 
424             while (scanner.Kind == LexKind.LBracket) {
425                 PushPosInfo(startChar, endChar);
426                 opnd = builder.Predicate(opnd, ParsePredicate(), /*reverseStep:*/false);
427                 PopPosInfo();
428             }
429             return opnd;
430         }
431 
IsPrimaryExpr()432         private bool IsPrimaryExpr() {
433             return (
434                 scanner.Kind == LexKind.String  ||
435                 scanner.Kind == LexKind.Number  ||
436                 scanner.Kind == LexKind.Dollar  ||
437                 scanner.Kind == LexKind.LParens ||
438                 scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner)
439             );
440         }
441 
442         /*
443         *   PrimaryExpr ::= Literal | Number | VariableReference | '(' Expr ')' | FunctionCall
444         */
ParsePrimaryExpr()445         private Node ParsePrimaryExpr() {
446             Debug.Assert(IsPrimaryExpr());
447             Node opnd;
448             switch (scanner.Kind) {
449             case LexKind.String:
450                 opnd = builder.String(scanner.StringValue);
451                 scanner.NextLex();
452                 break;
453             case LexKind.Number:
454                 opnd = builder.Number(XPathConvert.StringToDouble(scanner.RawValue));
455                 scanner.NextLex();
456                 break;
457             case LexKind.Dollar:
458                 int startChar = scanner.LexStart;
459                 scanner.NextLex();
460                 scanner.CheckToken(LexKind.Name);
461                 PushPosInfo(startChar, scanner.LexStart + scanner.LexSize);
462                 opnd = builder.Variable(scanner.Prefix, scanner.Name);
463                 PopPosInfo();
464                 scanner.NextLex();
465                 break;
466             case LexKind.LParens:
467                 scanner.NextLex();
468                 opnd = ParseExpr();
469                 scanner.PassToken(LexKind.RParens);
470                 break;
471             default:
472                 Debug.Assert(
473                     scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner),
474                     "IsPrimaryExpr() returned true, but the lexeme is not recognized"
475                 );
476                 opnd = ParseFunctionCall();
477                 break;
478             }
479             return opnd;
480         }
481 
482         /*
483         *   FunctionCall ::= FunctionName '(' (Expr (',' Expr)* )? ')'
484         */
ParseFunctionCall()485         private Node ParseFunctionCall() {
486             List<Node> argList = new List<Node>();
487             string name   = scanner.Name;
488             string prefix = scanner.Prefix;
489             int startChar = scanner.LexStart;
490 
491             scanner.PassToken(LexKind.Name);
492             scanner.PassToken(LexKind.LParens);
493 
494             if (scanner.Kind != LexKind.RParens) {
495                 while (true) {
496                     argList.Add(ParseExpr());
497                     if (scanner.Kind != LexKind.Comma) {
498                         scanner.CheckToken(LexKind.RParens);
499                         break;
500                     }
501                     scanner.NextLex();  // move off the ','
502                 }
503             }
504 
505             scanner.NextLex();          // move off the ')'
506             PushPosInfo(startChar, scanner.PrevLexEnd);
507             Node result = builder.Function(prefix, name, argList);
508             PopPosInfo();
509             return result;
510         }
511         #endregion
512 
513         /**************************************************************************************************/
514         /*  Helper methods                                                                                */
515         /**************************************************************************************************/
516 
PushPosInfo(int startChar, int endChar)517         private void PushPosInfo(int startChar, int endChar) {
518             posInfo.Push(startChar);
519             posInfo.Push(endChar);
520         }
521 
PopPosInfo()522         private void PopPosInfo() {
523             posInfo.Pop();
524             posInfo.Pop();
525         }
526 
PopPosInfo(out int startChar, out int endChar)527         private void PopPosInfo(out int startChar, out int endChar) {
528             endChar   = posInfo.Pop();
529             startChar = posInfo.Pop();
530         }
531     }
532 }
533